From dde9ec96f471127e5b6d8dfaeffce024b6326f1a Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Tue, 13 Feb 2018 15:24:04 +0000
Subject: COMPMID-909: Enabling in-place computation for batchnormalization and
 activation at graph level

Change-Id: I84d4a212629b21794451ab5fb5c5b187b5e28f98
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120127
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/CL/kernels/CLActivationLayerKernel.h       |  1 +
 .../core/CL/kernels/CLBatchNormalizationLayerKernel.h       |  1 +
 arm_compute/graph/INode.h                                   | 13 ++++++++++++-
 arm_compute/graph/NodeContext.h                             |  4 ++--
 arm_compute/graph/nodes/BatchNormalizationLayer.h           |  1 +
 arm_compute/runtime/CL/functions/CLActivationLayer.h        |  4 ++--
 .../runtime/CL/functions/CLBatchNormalizationLayer.h        |  2 +-
 arm_compute/runtime/NEON/functions/NEActivationLayer.h      |  4 ++--
 .../runtime/NEON/functions/NEBatchNormalizationLayer.h      |  2 +-
 9 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
index fbe450c4f2..c6d8f96a87 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -73,6 +73,7 @@ public:
 private:
     ICLTensor *_input;
     ICLTensor *_output;
+    bool       _run_in_place;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index fee5dd3bae..e9fd564fbd 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -96,6 +96,7 @@ private:
     const ICLTensor *_beta;
     const ICLTensor *_gamma;
     float            _epsilon;
+    bool             _run_in_place;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/graph/INode.h b/arm_compute/graph/INode.h
index 56b50b9424..1969423074 100644
--- a/arm_compute/graph/INode.h
+++ b/arm_compute/graph/INode.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,16 @@ public:
      * @return The updated target hint
      */
     TargetHint override_target_hint(TargetHint target_hint) const;
+    /** Method to check if the node supports in-place operations.
+     *
+     * @return True if the node supports in-place operations, false otherwise.
+     */
+    virtual bool supports_in_place() const;
+    /** Set the value of the _supports_in_place attribute.
+     *
+     * @param[in] value Boolean value to assign to _supports_in_place.
+     */
+    void set_supports_in_place(bool value);
 
 protected:
     /** Interface to be implement that override the hints
@@ -70,6 +80,7 @@ protected:
 
 protected:
     TargetHint _target_hint{ TargetHint::DONT_CARE };
+    bool       _supports_in_place{ false };
 };
 } // namespace graph
 } // namespace arm_compute
diff --git a/arm_compute/graph/NodeContext.h b/arm_compute/graph/NodeContext.h
index bc90f217a7..17ae49740b 100644
--- a/arm_compute/graph/NodeContext.h
+++ b/arm_compute/graph/NodeContext.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,7 +60,7 @@ public:
      * @param[in] input Input to add
      */
     void add_input(arm_compute::ITensor *input);
-    /** Adds and output to the context
+    /** Adds an output to the context
      *
      * @param[in] output Output to add
      */
diff --git a/arm_compute/graph/nodes/BatchNormalizationLayer.h b/arm_compute/graph/nodes/BatchNormalizationLayer.h
index 266c3905d8..abbf09a54e 100644
--- a/arm_compute/graph/nodes/BatchNormalizationLayer.h
+++ b/arm_compute/graph/nodes/BatchNormalizationLayer.h
@@ -51,6 +51,7 @@ public:
     BatchNormalizationLayer(AccessorType &&mean, AccessorType &&var, AccessorType &&gamma, AccessorType &&beta, float epsilon, ActivationLayerInfo act_info = ActivationLayerInfo())
         : _mean(std::move(mean)), _var(std::move(var)), _gamma(std::move(gamma)), _beta(std::move(beta)), _epsilon(epsilon), _act_info(act_info)
     {
+        set_supports_in_place(true);
     }
 
     // Inherited methods overriden:
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index 5b99abc5fb..8cb41d225a 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,7 +41,7 @@ class CLActivationLayer : public ICLSimpleFunction
 public:
     /** Set the input and output tensor.
      *
-     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
      *                          of the activation function. Data types supported: QS8/QS16/F16/F32.
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index 3d5145a697..39f567d6a3 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -46,7 +46,7 @@ public:
     CLBatchNormalizationLayer();
     /** Set the input and output tensors.
      *
-     * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+     * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
      *
      * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
      *                          3 lower dimensions represent a single input with dimensions [width, height, FM].
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 007c53a0a8..31714216fb 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,7 +41,7 @@ class NEActivationLayer : public INESimpleFunction
 public:
     /** Set the input and output tensor.
      *
-     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
      *
      * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
      *                                 of the activation function. Data types supported: QS8/QS16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 242144c987..85c62663ab 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -46,7 +46,7 @@ public:
     NEBatchNormalizationLayer();
     /** Set the input and output tensors.
      *
-     * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+     * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
      *
      * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
      *                          3 lower dimensions represent a single input with dimensions [width, height, FM].
-- 
cgit v1.2.1