From 1fd2c80692ed8ecefc4d8deb783564ad19eaf70c Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 16 Jun 2020 17:44:46 +0100 Subject: COMPMID-3375: Port NEActivationLayer functions/kernels to run on different tensors. Signed-off-by: Georgios Pinitas Change-Id: I98782bb73e9dc0899ffb1796aca6f99714adea94 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3343 Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/CPP/ICPPKernel.h | 2 +- .../core/NEON/kernels/NEActivationLayerKernel.h | 24 +++++------ .../core/NEON/kernels/NEReshapeLayerKernel.h | 4 +- arm_compute/core/experimental/Types.h | 50 ++++++++++++++++------ arm_compute/runtime/CPP/CPPScheduler.h | 4 +- arm_compute/runtime/IOperator.h | 4 +- arm_compute/runtime/IScheduler.h | 2 +- arm_compute/runtime/NEON/INEOperator.h | 4 +- .../runtime/NEON/functions/NEActivationLayer.h | 46 ++++++++++++++++++-- arm_compute/runtime/NEON/functions/NELSTMLayer.h | 14 +++--- arm_compute/runtime/NEON/functions/NERNNLayer.h | 6 +-- .../runtime/NEON/functions/NEReshapeLayer.h | 20 ++++++--- arm_compute/runtime/OMP/OMPScheduler.h | 2 +- arm_compute/runtime/SingleThreadScheduler.h | 2 +- 14 files changed, 126 insertions(+), 58 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h index 21f6ab714a..3ec54756a0 100644 --- a/arm_compute/core/CPP/ICPPKernel.h +++ b/arm_compute/core/CPP/ICPPKernel.h @@ -84,7 +84,7 @@ public: * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window()) * @param[in] info Info about executing thread and CPU. */ - virtual void run_op(const std::vector &inputs, std::vector &outputs, const Window &window, const ThreadInfo &info) + virtual void run_op(const std::vector &inputs, const std::vector &outputs, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(inputs, outputs, window, info); } diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 82103b988b..399afa63c6 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,6 +33,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the activation layer kernel. */ @@ -57,12 +58,12 @@ public: * * @note If the output tensor is a nullptr, the activation function will be performed in-place * - * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * @param[in, out] input Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[out] output Destination tensor info. Data type supported: same as @p input * @param[in] activation_info Activation layer information. */ - void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); + void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info); /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result @@ -75,7 +76,8 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; + void run_op(const std::vector &inputs, const std::vector &outputs, + const Window &window, const ThreadInfo &info) override; private: using ActivationFunction = ActivationLayerInfo::ActivationFunction; @@ -83,36 +85,34 @@ private: * * @param[in] window Region on which to execute the kernel. */ - using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const Window &window); + using ActivationFunctionExecutorPtr = void (NEActivationLayerKernel::*)(const ITensor *src, ITensor *dst, const Window &window); /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template typename std::enable_if::value, void>::type - activation(const Window &window); + activation(const ITensor *src, ITensor *dst, const Window &window); /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template - typename std::enable_if::value, void>::type activation(const Window &window); + typename std::enable_if::value, void>::type activation(const ITensor *src, ITensor *dst, const Window &window); /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template - typename std::enable_if::value, void>::type activation(const Window &window); + typename std::enable_if::value, void>::type activation(const ITensor *src, ITensor *dst, const Window &window); /** Function to apply an activation function on a tensor. * * @param[in] window Region on which to execute the kernel */ template - typename std::enable_if::value, void>::type activation(const Window &window); + typename std::enable_if::value, void>::type activation(const ITensor *src, ITensor *dst, const Window &window); private: - ITensor *_input; - ITensor *_output; ActivationFunctionExecutorPtr _func; ActivationLayerInfo _act_info; }; diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h index 6f888e0914..7a4dce128d 100644 --- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h @@ -57,8 +57,8 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output); // Inherited methods overridden: - void run_op(const std::vector &inputs, std::vector &outputs, const Window &window, const ThreadInfo &info) override; + void run_op(const std::vector &inputs, const std::vector &outputs, + const Window &window, const ThreadInfo &info) override; }; - } // namespace arm_compute #endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */ diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index 6043db9ff4..2b5591872a 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -36,21 +36,43 @@ class ITensor; /** Memory type */ enum class TensorType { - ACL_SRC = 0, - ACL_SRC_0 = 0, - ACL_SRC_1 = 1, - ACL_SRC_2 = 2, - ACL_DST = 30, - ACL_DST_0 = 30, - ACL_DST_1 = 31, - ACL_INT = 50, - ACL_INT_0 = 50, - ACL_INT_1 = 51, - ACL_INT_2 = 52 + ACL_UNKNOWN = -1, + ACL_SRC = 0, + ACL_SRC_0 = 0, + ACL_SRC_1 = 1, + ACL_SRC_2 = 2, + ACL_DST = 30, + ACL_DST_0 = 30, + ACL_DST_1 = 31, + ACL_INT = 50, + ACL_INT_0 = 50, + ACL_INT_1 = 51, + ACL_INT_2 = 52 }; -using InputOperatorTensors = std::pair; -using OutputOperatorTensors = std::pair; -using OperatorTensors = OutputOperatorTensors; + +/** Input tensor aggregate */ +struct InputTensor +{ + InputTensor(TensorType type, const ITensor *tensor) + : type(type), tensor(tensor) + { + } + + TensorType type{ TensorType::ACL_UNKNOWN }; + const ITensor *tensor{ nullptr }; +}; +/** Output tensor aggregate */ +struct OutputTensor +{ + OutputTensor(TensorType type, ITensor *tensor) + : type(type), tensor(tensor) + { + } + + TensorType type{ TensorType::ACL_UNKNOWN }; + ITensor *tensor{ nullptr }; +}; +using OperatorTensor = OutputTensor; namespace experimental { diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h index 78ad43c2b4..2ccb094fdb 100644 --- a/arm_compute/runtime/CPP/CPPScheduler.h +++ b/arm_compute/runtime/CPP/CPPScheduler.h @@ -77,7 +77,7 @@ public: * @param[in] inputs Vector that contains the input tensors. * @param[in] outputs Vector that contains the output tensors. */ - void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) override; protected: /** Will run the workloads in parallel using num_threads @@ -87,7 +87,7 @@ protected: void run_workloads(std::vector &workloads) override; private: - void schedule_common(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs); + void schedule_common(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs); struct Impl; std::unique_ptr _impl; }; diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h index 110c935702..cf3c8b05a1 100644 --- a/arm_compute/runtime/IOperator.h +++ b/arm_compute/runtime/IOperator.h @@ -46,7 +46,7 @@ public: * @param[in] workspace Vector that contains the workspace tensors. * */ - virtual void run(std::vector &inputs, std::vector &outputs, std::vector &workspace) = 0; + virtual void run(std::vector inputs, std::vector outputs, std::vector workspace) = 0; /** Prepare the function for executing * * Any one off pre-processing step required by the function is handled here @@ -55,7 +55,7 @@ public: * * @note Prepare stage might not need all the function's buffers' backing memory to be available in order to execute */ - virtual void prepare(std::vector constants) = 0; + virtual void prepare(std::vector constants) = 0; /** Return the memory requirements required by the workspace */ diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index 02d0cef086..40da86fd10 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -157,7 +157,7 @@ public: * @param[in] inputs Vector containing the input tensors. * @param[in] outputs Vector containing the output tensors. */ - virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) = 0; + virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) = 0; /** Execute all the passed workloads * diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h index 4467e6d5ab..2f6e18048d 100644 --- a/arm_compute/runtime/NEON/INEOperator.h +++ b/arm_compute/runtime/NEON/INEOperator.h @@ -54,8 +54,8 @@ public: INEOperator &operator=(INEOperator &&) = default; // Inherited methods overridden: - void run(std::vector &inputs, std::vector &outputs, std::vector &workspace) override final; - void prepare(std::vector constants) override final; + void run(std::vector inputs, std::vector outputs, std::vector workspace) override final; + void prepare(std::vector constants) override final; protected: std::unique_ptr _kernel; diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 95901dc2d8..8ac2dae911 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INEOperator.h" namespace arm_compute { @@ -37,7 +38,7 @@ class ITensor; * * @note The function simulates an activation layer with the specified activation function. */ -class NEActivationLayer : public INESimpleFunctionNoBorder +class NEActivationLayer : public IFunction { public: /** Constructor @@ -45,14 +46,16 @@ public: * @param[in] ctx Runtime context to be used by the function */ NEActivationLayer(IRuntimeContext *ctx = nullptr); + /** Destructor */ + ~NEActivationLayer(); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEActivationLayer(const NEActivationLayer &) = delete; /** Default move constructor */ - NEActivationLayer(NEActivationLayer &&) = default; + NEActivationLayer(NEActivationLayer &&); /** Prevent instances of this class from being copied (As this class contains pointers) */ NEActivationLayer &operator=(const NEActivationLayer &) = delete; /** Default move assignment operator */ - NEActivationLayer &operator=(NEActivationLayer &&) = default; + NEActivationLayer &operator=(NEActivationLayer &&); /** [NEActivationLayer snippet] **/ /** Set the input and output tensor. * @@ -75,6 +78,41 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden + void run() override; + +private: + struct Impl; + std::unique_ptr _impl; +}; + +namespace experimental +{ +/** Basic function to run @ref NEActivationLayerKernel */ +class NEActivationLayer : public INEOperator +{ +public: + /** Set the input and output tensor. + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[out] output Destination tensor info. Data type supported: same as @p input + * @param[in] activation_info Activation layer parameters. + */ + void configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info); + /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer + * + * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: same as @p input + * @param[in] act_info Activation layer information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info); + + // Inherited methods overridden: + MemoryRequirements workspace() const override; }; +} // namespace experimental } // namespace arm_compute #endif /* ARM_COMPUTE_NEACTIVATIONLAYER_H */ diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h index e85e87b88e..64845115b8 100644 --- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h +++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h @@ -151,28 +151,28 @@ private: NEArithmeticAddition _accum_input_gate1; NEArithmeticSubtractionKernel _subtract_input_gate; NEPixelWiseMultiplicationKernel _pixelwise_mul_input_gate; - NEActivationLayerKernel _activation_input_gate; + NEActivationLayer _activation_input_gate; NEFullyConnectedLayer _fully_connected_forget_gate; NEArithmeticAddition _accum_forget_gate1; NEPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate; - NEActivationLayerKernel _activation_forget_gate; + NEActivationLayer _activation_forget_gate; NEFullyConnectedLayer _fully_connected_cell_state; NEGEMM _gemm_cell_state1; NETransposeKernel _transpose_cell_state; NEArithmeticAdditionKernel _accum_cell_state1; NEArithmeticAdditionKernel _accum_cell_state2; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1; - NEActivationLayerKernel _activation_cell_state; - NEActivationLayerKernel _cell_clip; + NEActivationLayer _activation_cell_state; + NEActivationLayer _cell_clip; NEPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2; NEFullyConnectedLayer _fully_connected_output; NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state1; NEArithmeticAddition _accum_output1; - NEActivationLayerKernel _activation_output; - NEActivationLayerKernel _activation_output_state; + NEActivationLayer _activation_output; + NEActivationLayer _activation_output_state; NEPixelWiseMultiplicationKernel _pixelwise_mul_output_state2; NEFullyConnectedLayer _fully_connected_output_state; - NEActivationLayerKernel _projection_clip; + NEActivationLayer _projection_clip; NECopyKernel _copy_cell_state; NECopyKernel _copy_output; NEConcatenateLayer _concat_scratch_buffer; diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h index 0bfb905e19..db4134fd2d 100644 --- a/arm_compute/runtime/NEON/functions/NERNNLayer.h +++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,11 @@ #ifndef ARM_COMPUTE_NERNNLAYER_H #define ARM_COMPUTE_NERNNLAYER_H -#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NECopyKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" @@ -85,7 +85,7 @@ private: MemoryGroup _memory_group; NEGEMM _gemm_state_f; NEArithmeticAdditionKernel _add_kernel; - NEActivationLayerKernel _activation_kernel; + NEActivationLayer _activation; NEFullyConnectedLayer _fully_connected; NECopyKernel _copy_kernel; Tensor _fully_connected_out; diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index 5a296a776d..43605584e0 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -39,6 +39,18 @@ class ITensor; class NEReshapeLayer : public IFunction { public: + /** Default Constructor */ + NEReshapeLayer(); + /** Default Destructor */ + ~NEReshapeLayer(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayer(const NEReshapeLayer &) = delete; + /** Default move constructor */ + NEReshapeLayer(NEReshapeLayer &&); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + NEReshapeLayer &operator=(const NEReshapeLayer &) = delete; + /** Default move assignment operator */ + NEReshapeLayer &operator=(NEReshapeLayer &&); /** Initialise the kernel's inputs and outputs * * @param[in] input Input tensor. Data type supported: All @@ -59,12 +71,8 @@ public: void run() override; private: - const ITensor *_input - { - nullptr - }; - ITensor *_output{ nullptr }; - std::unique_ptr _kernel{ nullptr }; + struct Impl; + std::unique_ptr _impl; }; namespace experimental diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h index 8ed1705a97..b7c186a838 100644 --- a/arm_compute/runtime/OMP/OMPScheduler.h +++ b/arm_compute/runtime/OMP/OMPScheduler.h @@ -66,7 +66,7 @@ public: * @param[in] inputs Vector containing the input tensors. * @param[in] outputs Vector containing the output tensors. */ - void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) override; protected: /** Execute all the passed workloads diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h index 8094758249..8a69a5be15 100644 --- a/arm_compute/runtime/SingleThreadScheduler.h +++ b/arm_compute/runtime/SingleThreadScheduler.h @@ -57,7 +57,7 @@ public: * @param[in] inputs Vector containing the input tensors. * @param[in] outputs Vector containing the output tensors. */ - void schedule_op(ICPPKernel *kernel, const Hints &hints, std::vector &inputs, std::vector &outputs) override; + void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector &inputs, const std::vector &outputs) override; protected: /** Will run the workloads sequentially and in order. -- cgit v1.2.1