From 1fd2c80692ed8ecefc4d8deb783564ad19eaf70c Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 16 Jun 2020 17:44:46 +0100
Subject: COMPMID-3375: Port NEActivationLayer functions/kernels to run on
 different tensors.

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I98782bb73e9dc0899ffb1796aca6f99714adea94
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3343
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/runtime/CPP/CPPScheduler.h             |  4 +-
 arm_compute/runtime/IOperator.h                    |  4 +-
 arm_compute/runtime/IScheduler.h                   |  2 +-
 arm_compute/runtime/NEON/INEOperator.h             |  4 +-
 .../runtime/NEON/functions/NEActivationLayer.h     | 46 ++++++++++++++++++++--
 arm_compute/runtime/NEON/functions/NELSTMLayer.h   | 14 +++----
 arm_compute/runtime/NEON/functions/NERNNLayer.h    |  6 +--
 .../runtime/NEON/functions/NEReshapeLayer.h        | 20 +++++++---
 arm_compute/runtime/OMP/OMPScheduler.h             |  2 +-
 arm_compute/runtime/SingleThreadScheduler.h        |  2 +-
 10 files changed, 75 insertions(+), 29 deletions(-)

(limited to 'arm_compute/runtime')
diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h
index 78ad43c2b4..2ccb094fdb 100644
--- a/arm_compute/runtime/CPP/CPPScheduler.h
+++ b/arm_compute/runtime/CPP/CPPScheduler.h
@@ -77,7 +77,7 @@ public:
      * @param[in] inputs  Vector that contains the input tensors.
      * @param[in] outputs Vector that contains the output tensors.
      */
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) override;
 
 protected:
     /** Will run the workloads in parallel using num_threads
@@ -87,7 +87,7 @@ protected:
     void run_workloads(std::vector<Workload> &workloads) override;
 
 private:
-    void schedule_common(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs);
+    void schedule_common(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs);
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h
index 110c935702..cf3c8b05a1 100644
--- a/arm_compute/runtime/IOperator.h
+++ b/arm_compute/runtime/IOperator.h
@@ -46,7 +46,7 @@ public:
      * @param[in] workspace Vector that contains the workspace tensors.
      *
      */
-    virtual void run(std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs, std::vector<OperatorTensors *> &workspace) = 0;
+    virtual void run(std::vector<InputTensor> inputs, std::vector<OutputTensor> outputs, std::vector<OperatorTensor> workspace) = 0;
     /** Prepare the function for executing
      *
      * Any one off pre-processing step required by the function is handled here
@@ -55,7 +55,7 @@ public:
      *
      * @note Prepare stage might not need all the function's buffers' backing memory to be available in order to execute
      */
-    virtual void prepare(std::vector<OperatorTensors *> constants) = 0;
+    virtual void prepare(std::vector<OperatorTensor> constants) = 0;
 
     /** Return the memory requirements required by the workspace
      */
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
index 02d0cef086..40da86fd10 100644
--- a/arm_compute/runtime/IScheduler.h
+++ b/arm_compute/runtime/IScheduler.h
@@ -157,7 +157,7 @@ public:
      * @param[in] inputs  Vector containing the input tensors.
      * @param[in] outputs Vector containing the output tensors.
      */
-    virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs) = 0;
+    virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) = 0;
 
     /** Execute all the passed workloads
      *
diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h
index 4467e6d5ab..2f6e18048d 100644
--- a/arm_compute/runtime/NEON/INEOperator.h
+++ b/arm_compute/runtime/NEON/INEOperator.h
@@ -54,8 +54,8 @@ public:
     INEOperator &operator=(INEOperator &&) = default;
 
     // Inherited methods overridden:
-    void run(std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs, std::vector<OperatorTensors *> &workspace) override final;
-    void prepare(std::vector<OperatorTensors *> constants) override final;
+    void run(std::vector<InputTensor> inputs, std::vector<OutputTensor> outputs, std::vector<OperatorTensor> workspace) override final;
+    void prepare(std::vector<OperatorTensor> constants) override final;
 
 protected:
     std::unique_ptr<INEKernel> _kernel;
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 95901dc2d8..8ac2dae911 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,7 @@
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
 
 namespace arm_compute
 {
@@ -37,7 +38,7 @@ class ITensor;
  *
  * @note The function simulates an activation layer with the specified activation function.
  */
-class NEActivationLayer : public INESimpleFunctionNoBorder
+class NEActivationLayer : public IFunction
 {
 public:
     /** Constructor
@@ -45,14 +46,16 @@ public:
      * @param[in] ctx Runtime context to be used by the function
      */
     NEActivationLayer(IRuntimeContext *ctx = nullptr);
+    /** Destructor */
+    ~NEActivationLayer();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEActivationLayer(const NEActivationLayer &) = delete;
     /** Default move constructor */
-    NEActivationLayer(NEActivationLayer &&) = default;
+    NEActivationLayer(NEActivationLayer &&);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEActivationLayer &operator=(const NEActivationLayer &) = delete;
     /** Default move assignment operator */
-    NEActivationLayer &operator=(NEActivationLayer &&) = default;
+    NEActivationLayer &operator=(NEActivationLayer &&);
     /** [NEActivationLayer snippet] **/
     /** Set the input and output tensor.
      *
@@ -75,6 +78,41 @@ public:
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
+
+    // Inherited methods overridden
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
+};
+
+namespace experimental
+{
+/** Basic function to run @ref NEActivationLayerKernel */
+class NEActivationLayer : public INEOperator
+{
+public:
+    /** Set the input and output tensor.
+     *
+     * @param[in]  input           Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+     * @param[out] output          Destination tensor info. Data type supported: same as @p input
+     * @param[in]  activation_info Activation layer parameters.
+     */
+    void configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer
+     *
+     * @param[in] input    Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+     * @param[in] output   Destination tensor info. Data type supported: same as @p input
+     * @param[in] act_info Activation layer information.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
 };
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEACTIVATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index e85e87b88e..64845115b8 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -151,28 +151,28 @@ private:
     NEArithmeticAddition            _accum_input_gate1;
     NEArithmeticSubtractionKernel   _subtract_input_gate;
     NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate;
-    NEActivationLayerKernel         _activation_input_gate;
+    NEActivationLayer               _activation_input_gate;
     NEFullyConnectedLayer           _fully_connected_forget_gate;
     NEArithmeticAddition            _accum_forget_gate1;
     NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate;
-    NEActivationLayerKernel         _activation_forget_gate;
+    NEActivationLayer               _activation_forget_gate;
     NEFullyConnectedLayer           _fully_connected_cell_state;
     NEGEMM                          _gemm_cell_state1;
     NETransposeKernel               _transpose_cell_state;
     NEArithmeticAdditionKernel      _accum_cell_state1;
     NEArithmeticAdditionKernel      _accum_cell_state2;
     NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1;
-    NEActivationLayerKernel         _activation_cell_state;
-    NEActivationLayerKernel         _cell_clip;
+    NEActivationLayer               _activation_cell_state;
+    NEActivationLayer               _cell_clip;
     NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2;
     NEFullyConnectedLayer           _fully_connected_output;
     NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state1;
     NEArithmeticAddition            _accum_output1;
-    NEActivationLayerKernel         _activation_output;
-    NEActivationLayerKernel         _activation_output_state;
+    NEActivationLayer               _activation_output;
+    NEActivationLayer               _activation_output_state;
     NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state2;
     NEFullyConnectedLayer           _fully_connected_output_state;
-    NEActivationLayerKernel         _projection_clip;
+    NEActivationLayer               _projection_clip;
     NECopyKernel                    _copy_cell_state;
     NECopyKernel                    _copy_output;
     NEConcatenateLayer              _concat_scratch_buffer;
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index 0bfb905e19..db4134fd2d 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,11 +24,11 @@
 #ifndef ARM_COMPUTE_NERNNLAYER_H
 #define ARM_COMPUTE_NERNNLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
 #include "arm_compute/core/NEON/kernels/NECopyKernel.h"
 
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 
@@ -85,7 +85,7 @@ private:
     MemoryGroup                _memory_group;
     NEGEMM                     _gemm_state_f;
     NEArithmeticAdditionKernel _add_kernel;
-    NEActivationLayerKernel    _activation_kernel;
+    NEActivationLayer          _activation;
     NEFullyConnectedLayer      _fully_connected;
     NECopyKernel               _copy_kernel;
     Tensor                     _fully_connected_out;
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index 5a296a776d..43605584e0 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -39,6 +39,18 @@ class ITensor;
 class NEReshapeLayer : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEReshapeLayer();
+    /** Default Destructor */
+    ~NEReshapeLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReshapeLayer(const NEReshapeLayer &) = delete;
+    /** Default move constructor */
+    NEReshapeLayer(NEReshapeLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReshapeLayer &operator=(const NEReshapeLayer &) = delete;
+    /** Default move assignment operator */
+    NEReshapeLayer &operator=(NEReshapeLayer &&);
     /** Initialise the kernel's inputs and outputs
      *
      * @param[in]  input  Input tensor. Data type supported: All
@@ -59,12 +71,8 @@ public:
     void run() override;
 
 private:
-    const ITensor *_input
-    {
-        nullptr
-    };
-    ITensor                              *_output{ nullptr };
-    std::unique_ptr<NEReshapeLayerKernel> _kernel{ nullptr };
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 namespace experimental
diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h
index 8ed1705a97..b7c186a838 100644
--- a/arm_compute/runtime/OMP/OMPScheduler.h
+++ b/arm_compute/runtime/OMP/OMPScheduler.h
@@ -66,7 +66,7 @@ public:
      * @param[in] inputs  Vector containing the input tensors.
      * @param[in] outputs Vector containing the output tensors.
      */
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) override;
 
 protected:
     /** Execute all the passed workloads
diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h
index 8094758249..8a69a5be15 100644
--- a/arm_compute/runtime/SingleThreadScheduler.h
+++ b/arm_compute/runtime/SingleThreadScheduler.h
@@ -57,7 +57,7 @@ public:
      * @param[in] inputs  Vector containing the input tensors.
      * @param[in] outputs Vector containing the output tensors.
      */
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) override;
 
 protected:
     /** Will run the workloads sequentially and in order.
-- 
cgit v1.2.1