From 1d359279e22874121def2ce4bfdb633d94ea5ade Mon Sep 17 00:00:00 2001
From: Sheri Zhang <sheri.zhang@arm.com>
Date: Thu, 10 Jun 2021 13:56:11 +0100
Subject: Add in-place computation for elementwise operations

- Add in-place computation for elementwise operations at graph level
- Modify support case to test in-place computation for elementwise operations

Resolves: COMPMID-4414

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: I5a4de1235dd29a31160e770a16d62f4b98c84ae6
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5803
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/graph/Types.h                          |  1 +
 src/graph/mutators/InPlaceOperationMutator.cpp     | 93 ++++++++++++++++++----
 .../fixtures/ElementwiseOperationsFixture.h        | 84 ++++++++++++-------
 3 files changed, 132 insertions(+), 46 deletions(-)
diff --git a/arm_compute/graph/Types.h b/arm_compute/graph/Types.h
index 7306b82a1e..63a9433fe6 100644
--- a/arm_compute/graph/Types.h
+++ b/arm_compute/graph/Types.h
@@ -112,6 +112,7 @@ enum class EltwiseOperation
     Mul, /**< Arithmetic multiplication */
     Max, /**< Arithmetic maximum */
     Div, /**< Arithmetic division */
+    Min, /**< Arithmetic minimum */
 };
 
 /** Supported Unary Element-wise operations */
diff --git a/src/graph/mutators/InPlaceOperationMutator.cpp b/src/graph/mutators/InPlaceOperationMutator.cpp
index 61639a8f6f..616ec5c73d 100644
--- a/src/graph/mutators/InPlaceOperationMutator.cpp
+++ b/src/graph/mutators/InPlaceOperationMutator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/graph/mutators/InPlaceOperationMutator.h"
 
+#include "arm_compute/core/Validate.h"
 #include "arm_compute/graph/Graph.h"
 #include "arm_compute/graph/Logger.h"
 
@@ -69,6 +70,64 @@ bool output_edges_are_separate_tensors(Graph &g, const Edge *input_edge)
         return edge->tensor() != input_tensor;
     });
 }
+
+// If do in-place calculation, then need to use the new output and inherit original output's accessor
+void set_new_output_and_inherit_accessor(std::unique_ptr<INode> &node, Tensor *orig_output, Tensor *new_output)
+{
+    ARM_COMPUTE_LOG_GRAPH_INFO("Switching to in-place computation for the node with ID : "
+                               << node->id() << " and name : " << node->name() << std::endl);
+    // Update accessor
+    new_output->set_accessor(orig_output->extract_accessor());
+    // Update output
+    node->set_output_tensor(new_output->id(), 0);
+}
+
+// Try to mutate the node to perform the elementwise in-place calculation
+void try_in_place_elementwise(std::unique_ptr<INode> &node)
+{
+    // Get input edge
+    Edge *input0_edge = node->input_edge(0);
+    Edge *input1_edge = node->input_edge(1);
+    ARM_COMPUTE_ERROR_ON(input0_edge == nullptr || input1_edge == nullptr);
+
+    auto input0_tensor = input0_edge->tensor();
+    auto input1_tensor = input1_edge->tensor();
+    ARM_COMPUTE_ERROR_ON(input0_tensor == nullptr || input1_tensor == nullptr);
+
+    const auto shape0 = input0_tensor->desc().shape;
+    const auto shape1 = input1_tensor->desc().shape;
+    const auto qinfo0 = input0_tensor->desc().quant_info;
+    const auto qinfo1 = input1_tensor->desc().quant_info;
+
+    const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+    // Inputs are not broadcast compatible
+    if(out_shape.total_size() == 0)
+    {
+        return;
+    }
+
+    // Get current output tensor
+    auto current_output_tensor = node->output(0);
+    ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr);
+    const auto qinfo_out = current_output_tensor->desc().quant_info;
+
+    // Can do in place, if the input has same shape as output, has same quntisation info as output, and input doesn't have accessor.
+    bool input0_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (input0_tensor->accessor() == nullptr);
+    bool input1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (input1_tensor->accessor() == nullptr);
+
+    if(input0_can_in_place)
+    {
+        set_new_output_and_inherit_accessor(node, current_output_tensor, input0_tensor);
+    }
+    else if(input1_can_in_place)
+    {
+        set_new_output_and_inherit_accessor(node, current_output_tensor, input1_tensor);
+    }
+    else
+    {
+        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
+    }
+}
 } // namespace
 
 const char *InPlaceOperationMutator::name()
@@ -103,25 +162,27 @@ void InPlaceOperationMutator::mutate(Graph &g)
             // Check if parent has a single output if yes then force in place calculation else not
             if((input_edge != nullptr) && output_edges_are_separate_tensors(g, input_edge))
             {
-                // Get current and new output tensors
-                auto current_output_tensor = node->output(0);
-                auto new_output_tensor     = input_edge->tensor();
-
-                ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
-
-                // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
-                if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
+                if(node->type() == NodeType::EltwiseLayer)
                 {
-                    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
+                    try_in_place_elementwise(node);
                 }
                 else
                 {
-                    ARM_COMPUTE_LOG_GRAPH_VERBOSE("Switching to in-place computation for the node with ID : "
-                                                  << node->id() << " and name : " << node->name() << std::endl);
-                    // Update accessor
-                    new_output_tensor->set_accessor(current_output_tensor->extract_accessor());
-                    // Update output
-                    node->set_output_tensor(new_output_tensor->id(), 0);
+                    // Get current and new output tensors
+                    auto current_output_tensor = node->output(0);
+                    auto new_output_tensor     = input_edge->tensor();
+
+                    ARM_COMPUTE_ERROR_ON(current_output_tensor == nullptr || new_output_tensor == nullptr);
+
+                    // Prevent in-place operation if there is an accessor bound to the in-place tensor or quantization info are different
+                    if(new_output_tensor->accessor() != nullptr || current_output_tensor->desc().quant_info != new_output_tensor->desc().quant_info)
+                    {
+                        ARM_COMPUTE_LOG_GRAPH_VERBOSE("Prevented in-place operation as there is an accessor bound to the input tensor or the quantization info are different.\n");
+                    }
+                    else
+                    {
+                        set_new_output_and_inherit_accessor(node, current_output_tensor, new_output_tensor);
+                    }
                 }
             }
         }
diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h
index 352720c03b..6661862342 100644
--- a/tests/validation/fixtures/ElementwiseOperationsFixture.h
+++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
 #include "tests/IAccessor.h"
@@ -48,12 +49,12 @@ public:
     template <typename...>
     void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1,
                DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool use_dyanmic_shape = false)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place = false, bool use_dynamic_shape = false)
     {
         _op                = op;
-        _use_dynamic_shape = use_dyanmic_shape;
+        _use_dynamic_shape = use_dynamic_shape;
 
-        _target    = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out);
+        _target    = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out, in_place);
         _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out);
     }
 
@@ -82,12 +83,30 @@ protected:
     }
 
     TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
-                              QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+                              QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place = false)
     {
         // Create tensors
-        TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
-        TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
-        TensorType dst      = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out);
+        const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+        TensorType        ref_src1  = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
+        TensorType        ref_src2  = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
+        TensorType        dst       = create_tensor<TensorType>(out_shape, output_data_type, 1, qinfo_out);
+
+        // Check whether do in-place computation and whether inputs are broadcast compatible
+        TensorType *actual_dst        = &dst;
+        bool        src1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out);
+        bool        src2_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out);
+        bool        do_in_place       = in_place && out_shape.total_size() != 0 && (src1_can_in_place || src2_can_in_place);
+        if(do_in_place)
+        {
+            if(src1_can_in_place)
+            {
+                actual_dst = &ref_src1;
+            }
+            else if(src2_can_in_place)
+            {
+                actual_dst = &ref_src2;
+            }
+        }
 
         // if _use_dynamic_shape is true, this fixture will test scenario for dynamic shapes.
         // - At configure time, all input tensors are marked as dynamic using set_tensor_dynamic()
@@ -101,7 +120,7 @@ protected:
 
         // Create and configure function
         FunctionType elem_op;
-        elem_op.configure(&ref_src1, &ref_src2, &dst);
+        elem_op.configure(&ref_src1, &ref_src2, actual_dst);
 
         if(_use_dynamic_shape)
         {
@@ -111,16 +130,21 @@ protected:
 
         ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
         ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
 
         // Allocate tensors
         ref_src1.allocator()->allocate();
         ref_src2.allocator()->allocate();
-        dst.allocator()->allocate();
+
+        // If in-place computation is not supported, still need to allocate original dst
+        if(!do_in_place)
+        {
+            ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+            dst.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+        }
 
         ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
 
         // Fill tensors
         fill(AccessorType(ref_src1), 0);
@@ -129,7 +153,7 @@ protected:
         // Compute function
         elem_op.run();
 
-        return dst;
+        return std::move(*actual_dst);
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1,
@@ -162,11 +186,11 @@ public:
     template <typename...>
     void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1,
                DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool in_place = false)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, in_place);
         _act_info = act_info;
     }
 
@@ -227,7 +251,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
     }
 };
 
@@ -253,7 +277,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true, true);
     }
 };
 
@@ -279,7 +303,7 @@ public:
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
     }
 };
 
@@ -333,7 +357,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
     }
 };
 
@@ -359,7 +383,7 @@ public:
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
     }
 };
 
@@ -402,7 +426,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, true);
     }
 };
 
@@ -415,7 +439,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
     }
 };
 
@@ -441,7 +465,7 @@ public:
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
     }
 };
 
@@ -484,7 +508,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, true);
     }
 };
 
@@ -497,7 +521,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
     }
 };
 
@@ -523,7 +547,7 @@ public:
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
     }
 };
 
@@ -566,7 +590,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, true);
     }
 };
 
@@ -579,7 +603,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::PRELU, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
     }
 };
 
@@ -622,7 +646,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::PRELU, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, true);
     }
 };
 
@@ -635,7 +659,7 @@ public:
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
     }
 };
 
@@ -661,7 +685,7 @@ public:
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
     }
 };
 
-- 
cgit v1.2.1