From 80feed5193de6b10d8ab65b42fb988c241c5d09d Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Wed, 3 Jun 2020 13:20:41 +0100
Subject: COMPMID-3479: Perform in-place computations in
 NEElementwiseUnaryKernel

Change-Id: I2102bfe95c2c2335bb587842f9d860cf939a9026
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3315
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../core/NEON/kernels/NEElementwiseUnaryKernel.h   |  8 +--
 .../NEON/functions/NEElementwiseUnaryLayer.h       | 18 +++----
 src/graph/mutators/InPlaceOperationMutator.cpp     |  1 +
 tests/validation/NEON/ElementwiseNegation.cpp      | 34 +++++++------
 .../validation/fixtures/ElementWiseUnaryFixture.h  | 58 +++++++++++++++-------
 5 files changed, 73 insertions(+), 46 deletions(-)

diff --git a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h
index 9a41cecf19..02c390b6ba 100644
--- a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h
+++ b/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h
@@ -57,11 +57,11 @@ public:
     /** Default destructor */
     ~NEElementwiseUnaryKernel() = default;
 
-    /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel
+    /** Function to configure the @ref NEElementwiseUnaryKernel
      *
-     * @param[in] op     Arithmetic operation to be executed.
-     * @param[in] input  First tensor input. Data types supported: F16/F32.
-     * @param[in] output Output tensor. Data types supported: Same as @p input.
+     * @param[in]  op     Arithmetic operation to be executed.
+     * @param[in]  input  First tensor input. Data types supported: F16/F32.
+     * @param[out] output Output tensor. Data types supported: Same as @p input.
      */
     void configure(ElementWiseUnary op, const ITensor *input, ITensor *output);
 
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
index 094f875b35..1fd24887a5 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,14 @@
 #ifndef ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
 #define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
 
 /** Basic function to perform inverse square root on an input tensor. */
-class NERsqrtLayer : public INESimpleFunction
+class NERsqrtLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function
@@ -51,7 +51,7 @@ public:
 };
 
 /** Basic function to perform exponential on an input tensor. */
-class NEExpLayer : public INESimpleFunction
+class NEExpLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function
@@ -71,7 +71,7 @@ public:
 };
 
 /** Basic function to negate an input tensor. */
-class NENegLayer : public INESimpleFunction
+class NENegLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function
@@ -91,7 +91,7 @@ public:
 };
 
 /** Basic function to compute the natural logarithm of an input tensor. */
-class NELogLayer : public INESimpleFunction
+class NELogLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function
@@ -111,7 +111,7 @@ public:
 };
 
 /** Basic function to compute the absolute value of an input tensor. */
-class NEAbsLayer : public INESimpleFunction
+class NEAbsLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function
@@ -131,7 +131,7 @@ public:
 };
 
 /** Basic function to compute the round value elementwise of an input tensor. */
-class NERoundLayer : public INESimpleFunction
+class NERoundLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function
@@ -151,7 +151,7 @@ public:
 };
 
 /** Basic function to compute the sine of an input tensor. */
-class NESinLayer : public INESimpleFunction
+class NESinLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function
diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp
index ba80d8a26a..394dba84ff 100644
--- a/src/graph/mutators/InPlaceOperationMutator.cpp
+++ b/src/graph/mutators/InPlaceOperationMutator.cpp
@@ -47,6 +47,7 @@ void InPlaceOperationMutator::mutate(Graph &g)
         NodeType::ActivationLayer,
         NodeType::BatchNormalizationLayer,
         NodeType::EltwiseLayer,
+        NodeType::UnaryEltwiseLayer,
         NodeType::PrintLayer
     };
 
diff --git a/tests/validation/NEON/ElementwiseNegation.cpp b/tests/validation/NEON/ElementwiseNegation.cpp
index 7e7c838472..8e69d22002 100644
--- a/tests/validation/NEON/ElementwiseNegation.cpp
+++ b/tests/validation/NEON/ElementwiseNegation.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,19 +70,21 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Sma
 }
 
 template <typename T>
-using NENegLayerFixture = NegValidationFixture<Tensor, Accessor, NENegLayer, T>;
+using NENegLayerFixture = NegValidationInPlaceFixture<Tensor, Accessor, NENegLayer, T>;
 
 TEST_SUITE(Float)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                     DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+                                                                                                             framework::dataset::make("DataType", DataType::F16)),
+                                                                                                     framework::dataset::make("InPlace", { true, false })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fp16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                   DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                           framework::dataset::make("DataType", DataType::F16)),
+                                                                                                   framework::dataset::make("InPlace", { false })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fp16);
@@ -92,15 +94,17 @@ TEST_SUITE_END() // FP16
 #endif           // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(),
+                                                                                                        framework::dataset::make("DataType", DataType::F32)),
+                                                                                                framework::dataset::make("InPlace", { true, false })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fp32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                    DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                            framework::dataset::make("DataType", DataType::F32)),
+                                                                                                    framework::dataset::make("InPlace", { false })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fp32);
@@ -110,15 +114,17 @@ TEST_SUITE_END() // Float
 
 TEST_SUITE(Integer)
 TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture<int32_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                  DataType::S32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerFixture<int32_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(),
+                                                                                                          framework::dataset::make("DataType", DataType::S32)),
+                                                                                                  framework::dataset::make("InPlace", { true, false })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                      DataType::S32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NENegLayerFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+                                                                                                              framework::dataset::make("DataType", DataType::S32)),
+                                                                                                      framework::dataset::make("InPlace", { false })))
 {
     // Validate output
     validate(Accessor(_target), _reference);
diff --git a/tests/validation/fixtures/ElementWiseUnaryFixture.h b/tests/validation/fixtures/ElementWiseUnaryFixture.h
index 3f6d5b3cb3..b11b802d11 100644
--- a/tests/validation/fixtures/ElementWiseUnaryFixture.h
+++ b/tests/validation/fixtures/ElementWiseUnaryFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,10 +44,10 @@ class ElementWiseUnaryValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape input_shape, DataType input_data_type, ElementWiseUnary op)
+    void setup(TensorShape input_shape, DataType input_data_type, bool in_place, ElementWiseUnary op)
     {
         _op        = op;
-        _target    = compute_target(input_shape, input_data_type);
+        _target    = compute_target(input_shape, input_data_type, in_place);
         _reference = compute_reference(input_shape, input_data_type);
     }
 
@@ -115,25 +115,27 @@ protected:
         }
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, bool in_place)
     {
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type);
         TensorType dst = create_tensor<TensorType>(shape, data_type);
 
+        TensorType *actual_dst = in_place ? &src : &dst;
+
         // Create and configure function
         FunctionType elwiseunary_layer;
-
-        elwiseunary_layer.configure(&src, &dst);
+        elwiseunary_layer.configure(&src, actual_dst);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Allocate tensors
         src.allocator()->allocate();
-        dst.allocator()->allocate();
         ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+        if(!in_place)
+        {
+            ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+            dst.allocator()->allocate();
+            ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+        }
 
         // Fill tensors
         fill(AccessorType(src), 0, data_type);
@@ -141,7 +143,14 @@ protected:
         // Compute function
         elwiseunary_layer.run();
 
-        return dst;
+        if(in_place)
+        {
+            return src;
+        }
+        else
+        {
+            return dst;
+        }
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
@@ -167,7 +176,7 @@ public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type)
     {
-        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::RSQRT);
+        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::RSQRT);
     }
 };
 
@@ -178,7 +187,7 @@ public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type)
     {
-        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::EXP);
+        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::EXP);
     }
 };
 
@@ -189,7 +198,18 @@ public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type)
     {
-        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::NEG);
+        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::NEG);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class NegValidationInPlaceFixture : public ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(const TensorShape &shape, DataType data_type, bool in_place)
+    {
+        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, in_place, ElementWiseUnary::NEG);
     }
 };
 
@@ -200,7 +220,7 @@ public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type)
     {
-        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::LOG);
+        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::LOG);
     }
 };
 
@@ -211,7 +231,7 @@ public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type)
     {
-        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::ABS);
+        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ABS);
     }
 };
 
@@ -222,7 +242,7 @@ public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type)
     {
-        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::SIN);
+        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::SIN);
     }
 };
 
@@ -233,7 +253,7 @@ public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type)
     {
-        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, ElementWiseUnary::ROUND);
+        ElementWiseUnaryValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, false, ElementWiseUnary::ROUND);
     }
 };
 } // namespace validation
-- 
cgit v1.2.1