From b30dcc5ab8eb2bd37f0ab742af1ec45113d54296 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 20 Jun 2017 09:07:21 +0100 Subject: COMPMID-345 - In-place computation for Activation Layer Change-Id: I25ebfccc3d3e758cc8164e0b33805c0bb303891a Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78226 Tested-by: Kaizen Reviewed-by: Pablo Tello Reviewed-by: Georgios Pinitas --- .../core/CL/kernels/CLActivationLayerKernel.h | 34 ++++++++++++++++++---- .../core/NEON/kernels/NEActivationLayerKernel.h | 17 +++++++---- .../runtime/CL/functions/CLActivationLayer.h | 11 ++++--- .../runtime/NEON/functions/NEActivationLayer.h | 11 ++++--- 4 files changed, 53 insertions(+), 20 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h index 490e70544b..df22574de8 100644 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -24,23 +24,45 @@ #ifndef __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ #define __ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ -#include "arm_compute/core/CL/ICLSimple3DKernel.h" +#include "arm_compute/core/CL/ICLKernel.h" namespace arm_compute { class ICLTensor; /** Interface for the activation layer kernel. */ -class CLActivationLayerKernel : public ICLSimple3DKernel +class CLActivationLayerKernel : public ICLKernel { public: + /** Default constructor */ + CLActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLActivationLayerKernel(const CLActivationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + CLActivationLayerKernel(CLActivationLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default; + /** Default destructor */ + ~CLActivationLayerKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16. - * @param[out] output Destination tensor. Data type should match the input data type. - * @param[in] act_info Activation layer information. + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer information. */ - void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); + void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + ICLTensor *_input; + ICLTensor *_output; }; } #endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h index 97f92d6a1e..539bca587a 100644 --- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h @@ -25,14 +25,14 @@ #define __ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H__ #include "arm_compute/core/FixedPoint.h" -#include "arm_compute/core/NEON/INESimpleKernel.h" +#include "arm_compute/core/NEON/INEKernel.h" namespace arm_compute { class ITensor; /** Interface for the activation layer kernel. */ -class NEActivationLayerKernel : public INESimpleKernel +class NEActivationLayerKernel : public INEKernel { public: /** Constructor */ @@ -47,11 +47,14 @@ public: NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default; /** Set the input and output tensor. * - * @param[in] input Source tensor. Data types supported: QS8/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] activation_info Activation layer information. + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] activation_info Activation layer information. */ - void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info); + void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); // Inherited methods overridden: void run(const Window &window) override; @@ -77,6 +80,8 @@ private: typename std::enable_if::value, void>::type activation(const Window &window); private: + ITensor *_input; + ITensor *_output; ActivationFunctionExecutorPtr _func; ActivationLayerInfo _act_info; }; diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h index 6468c996a2..3028afb25b 100644 --- a/arm_compute/runtime/CL/functions/CLActivationLayer.h +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -41,11 +41,14 @@ class CLActivationLayer : public ICLSimpleFunction public: /** Set the input and output tensor. * - * @param[in] input Source tensor. Data types supported: F16, F32, U16, S16. - * @param[out] output Destination tensor. Data type should match the input data type. - * @param[in] act_info Activation layer parameters. + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer parameters. */ - void configure(const ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); + void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info); }; } #endif /* __ARM_COMPUTE_CLACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 35366e16fb..b1a211553d 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -41,11 +41,14 @@ class NEActivationLayer : public INESimpleFunction public: /** Set the input and output tensor. * - * @param[in] input Source tensor. Data type supported: QS8/F32. - * @param[out] output Destination tensor. Data type supported: same as @p input - * @param[in] activation_info Activation layer parameters. + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: QS8/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] activation_info Activation layer parameters. */ - void configure(const ITensor *input, ITensor *output, ActivationLayerInfo activation_info); + void configure(ITensor *input, ITensor *output, ActivationLayerInfo activation_info); }; } #endif /* __ARM_COMPUTE_NEACTIVATIONLAYER_H__ */ -- cgit v1.2.1