From dde9ec96f471127e5b6d8dfaeffce024b6326f1a Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 13 Feb 2018 15:24:04 +0000 Subject: COMPMID-909: Enabling in-place computation for batchnormalization and activation at graph level Change-Id: I84d4a212629b21794451ab5fb5c5b187b5e28f98 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120127 Reviewed-by: Georgios Pinitas Tested-by: Jenkins --- arm_compute/core/CL/kernels/CLActivationLayerKernel.h | 1 + .../core/CL/kernels/CLBatchNormalizationLayerKernel.h | 1 + arm_compute/graph/INode.h | 13 ++++++++++++- arm_compute/graph/NodeContext.h | 4 ++-- arm_compute/graph/nodes/BatchNormalizationLayer.h | 1 + arm_compute/runtime/CL/functions/CLActivationLayer.h | 4 ++-- .../runtime/CL/functions/CLBatchNormalizationLayer.h | 2 +- arm_compute/runtime/NEON/functions/NEActivationLayer.h | 4 ++-- .../runtime/NEON/functions/NEBatchNormalizationLayer.h | 2 +- 9 files changed, 23 insertions(+), 9 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h index fbe450c4f2..c6d8f96a87 100644 --- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h @@ -73,6 +73,7 @@ public: private: ICLTensor *_input; ICLTensor *_output; + bool _run_in_place; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h index fee5dd3bae..e9fd564fbd 100644 --- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -96,6 +96,7 @@ private: const ICLTensor *_beta; const ICLTensor *_gamma; float _epsilon; + bool _run_in_place; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h index 56b50b9424..1969423074 100644 --- a/arm_compute/graph/INode.h +++ b/arm_compute/graph/INode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,6 +58,16 @@ public: * @return The updated target hint */ TargetHint override_target_hint(TargetHint target_hint) const; + /** Method to check if the node supports in-place operations. + * + * @return True if the node supports in-place operations, false otherwise. + */ + virtual bool supports_in_place() const; + /** Set the value of the _supports_in_place attribute. + * + * @param[in] value Boolean value to assign to _supports_in_place. + */ + void set_supports_in_place(bool value); protected: /** Interface to be implement that override the hints @@ -70,6 +80,7 @@ protected: protected: TargetHint _target_hint{ TargetHint::DONT_CARE }; + bool _supports_in_place{ false }; }; } // namespace graph } // namespace arm_compute diff --git a/arm_compute/graph/NodeContext.h b/arm_compute/graph/NodeContext.h index bc90f217a7..17ae49740b 100644 --- a/arm_compute/graph/NodeContext.h +++ b/arm_compute/graph/NodeContext.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -60,7 +60,7 @@ public: * @param[in] input Input to add */ void add_input(arm_compute::ITensor *input); - /** Adds and output to the context + /** Adds an output to the context * * @param[in] output Output to add */ diff --git a/arm_compute/graph/nodes/BatchNormalizationLayer.h b/arm_compute/graph/nodes/BatchNormalizationLayer.h index 266c3905d8..abbf09a54e 100644 --- a/arm_compute/graph/nodes/BatchNormalizationLayer.h +++ b/arm_compute/graph/nodes/BatchNormalizationLayer.h @@ -51,6 +51,7 @@ public: BatchNormalizationLayer(AccessorType &&mean, AccessorType &&var, AccessorType &&gamma, AccessorType &&beta, float epsilon, ActivationLayerInfo act_info = ActivationLayerInfo()) : _mean(std::move(mean)), _var(std::move(var)), _gamma(std::move(gamma)), _beta(std::move(beta)), _epsilon(epsilon), _act_info(act_info) { + set_supports_in_place(true); } // Inherited methods overriden: diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h index 5b99abc5fb..8cb41d225a 100644 --- a/arm_compute/runtime/CL/functions/CLActivationLayer.h +++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,7 @@ class CLActivationLayer : public ICLSimpleFunction public: /** Set the input and output tensor. * - * @note If the output tensor is a nullptr, the activation function will be performed in-place + * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result * of the activation function. Data types supported: QS8/QS16/F16/F32. diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h index 3d5145a697..39f567d6a3 100644 --- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -46,7 +46,7 @@ public: CLBatchNormalizationLayer(); /** Set the input and output tensors. * - * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. * 3 lower dimensions represent a single input with dimensions [width, height, FM]. diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h index 007c53a0a8..31714216fb 100644 --- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,7 @@ class NEActivationLayer : public INESimpleFunction public: /** Set the input and output tensor. * - * @note If the output tensor is a nullptr, the activation function will be performed in-place + * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result * of the activation function. Data types supported: QS8/QS16/F32. diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h index 242144c987..85c62663ab 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -46,7 +46,7 @@ public: NEBatchNormalizationLayer(); /** Set the input and output tensors. * - * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place + * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place * * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. * 3 lower dimensions represent a single input with dimensions [width, height, FM]. -- cgit v1.2.1