From cc2877368d5e15d9ea89d31c84ec651fc0fffd13 Mon Sep 17 00:00:00 2001
From: Gunes Bayir <gunes.bayir@arm.com>
Date: Thu, 19 Jan 2023 15:56:00 +0000
Subject: Change dynamic fusion API to return destination tensor info

The new dynamic fusion API is introduced in the following patch:
https://review.mlplatform.org/c/ml/ComputeLibrary/+/8906

For each operator (except Conv2D, which is migrated in the above patch), we
   - remove destination tensor from is_supported, validate and create calls
   - make create_op return ITensorInfo* to the intermediate destination object

Affected operators:
   - DepthwiseConv2D
   - Cast
   - Elementwise Ops
   - Clamp
   - Reshape
   - Resize

Resolves: COMPMID-5777
Change-Id: Ib60ec8a5f081752808455d7a7d790f2ed0627059
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8991
Reviewed-by: Ramy Elgammal <ramy.elgammal@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Dynamic-Fusion: Ramy Elgammal <ramy.elgammal@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 .../internal/GpuElementwiseBinaryCommon.cpp        | 119 ++++++++++-----------
 1 file changed, 58 insertions(+), 61 deletions(-)

(limited to 'src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp')

diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
index aec22e100c..7c087c9a7b 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+#include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
@@ -43,30 +44,23 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *lhs, c
         auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(broadcast_pair.first));
     }
 }
-GpuOperatorType operator_type = GpuOperatorType::Simple;
-}
 
-ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
+Status is_supported_op_helper(const GpuWorkloadContext                &context,
+                              const ITensorInfo                       *lhs,
+                              const ITensorInfo                       *rhs,
+                              const ITensorInfo                       *dst,
+                              const ElementwiseBinaryCommonAttributes &attributes)
 {
-    _operation = operation;
-    return *this;
-}
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
 
-ElementwiseBinaryCommonAttributes::ElementwiseOp ElementwiseBinaryCommonAttributes::operation() const
-{
-    return _operation;
-}
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext                &context,
-                                                   const ITensorInfo                       *lhs,
-                                                   const ITensorInfo                       *rhs,
-                                                   const ITensorInfo                       *dst,
-                                                   const ElementwiseBinaryCommonAttributes &attributes)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
+    if(dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
 
-    // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs);
 
     // Check components
@@ -80,16 +74,8 @@ Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, lhs);
             arguments.add_const_tensor(ACL_SRC_1, rhs);
+            arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
 
-            // We needed to pass the original dst pointer for in-place detection, in case its shape is not empty
-            if(dst->tensor_shape().total_size() == 0)
-            {
-                arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
-            }
-            else
-            {
-                arguments.add_const_tensor(ACL_DST_0, dst);
-            }
             ARM_COMPUTE_RETURN_ON_ERROR(ClComponentElementwiseBinary::validate(arguments, attributes));
         }
     }
@@ -101,18 +87,40 @@ Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext
     return Status{};
 }
 
+GpuOperatorType operator_type = GpuOperatorType::Simple;
+} // namespace
+
+ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
+{
+    _operation = operation;
+    return *this;
+}
+
+ElementwiseBinaryCommonAttributes::ElementwiseOp ElementwiseBinaryCommonAttributes::operation() const
+{
+    return _operation;
+}
+
+Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext                &context,
+                                                   const ITensorInfo                       *lhs,
+                                                   const ITensorInfo                       *rhs,
+                                                   const ElementwiseBinaryCommonAttributes &attributes)
+{
+    return is_supported_op_helper(context, lhs, rhs, nullptr, attributes);
+}
+
 Status GpuElementwiseBinaryCommon::validate_op(const GpuWorkloadSketch                 &sketch,
                                                const ITensorInfo                       *lhs,
                                                const ITensorInfo                       *rhs,
-                                               const ITensorInfo                       *dst,
                                                const ElementwiseBinaryCommonAttributes &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
-    ARM_COMPUTE_RETURN_ERROR_ON(
-        !lhs->has_valid_id() || !rhs->has_valid_id() || !dst->has_valid_id());
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+    ARM_COMPUTE_RETURN_ERROR_ON(!lhs->has_valid_id() || !rhs->has_valid_id());
+
+    // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
 
     // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs);
 
     // Perform fusion test
@@ -125,20 +133,21 @@ Status GpuElementwiseBinaryCommon::validate_op(const GpuWorkloadSketch
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
                                     "Operator fusion test failed. This operator cannot be fused into the workload");
 
-    // Check if configuration is supported, and passing the original dst for in-place detection
-    return is_supported_op(*sketch.gpu_context(), lhs, rhs, dst, attributes);
+    // Check if configuration is supported
+    return is_supported_op_helper(*sketch.gpu_context(), lhs, rhs, &dst_info_to_validate, attributes);
 }
 
-void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch                       &sketch,
-                                           ITensorInfo                             *lhs,
-                                           ITensorInfo                             *rhs,
-                                           ITensorInfo                             *dst,
-                                           const ElementwiseBinaryCommonAttributes &attributes)
+ITensorInfo *GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch                       &sketch,
+                                                   ITensorInfo                             *lhs,
+                                                   ITensorInfo                             *rhs,
+                                                   const ElementwiseBinaryCommonAttributes &attributes)
 {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
-    const bool        in_place = (lhs == dst) || (rhs == dst);
-    static TensorInfo in_place_dst;
-    in_place_dst = in_place ? sketch.create_tensor_info(*lhs) : TensorInfo{};
+    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs);
+    ARM_COMPUTE_LOG_PARAMS(lhs, rhs);
+    ARM_COMPUTE_ERROR_THROW_ON(GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, attributes));
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
 
     // Auto initialize dst tensor
     calculate_and_init_dst_if_empty(dst, lhs, rhs);
@@ -160,14 +169,7 @@ void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, lhs);
             arguments.add_const_tensor(ACL_SRC_1, rhs);
-            if(in_place)
-            {
-                arguments.add_const_tensor(ACL_DST_0, &in_place_dst);
-            }
-            else
-            {
-                arguments.add_const_tensor(ACL_DST_0, dst);
-            }
+            arguments.add_const_tensor(ACL_DST_0, dst);
             comp_graph.add_new_component<ClComponentElementwiseBinary>(properties, arguments, attributes);
         }
     }
@@ -183,16 +185,11 @@ void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch
     ArgumentPack<ITensorInfo> tensors;
     tensors.add_const_tensor(ACL_SRC_0, lhs);
     tensors.add_const_tensor(ACL_SRC_1, rhs);
-    if(in_place)
-    {
-        tensors.add_const_tensor(ACL_DST_0, &in_place_dst);
-    }
-    else
-    {
-        tensors.add_tensor(ACL_DST_0, dst);
-    }
+    tensors.add_tensor(ACL_DST_0, dst);
     const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
     sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
 }
 
 } // namespace dynamic_fusion
-- 
cgit v1.2.1