From cc2877368d5e15d9ea89d31c84ec651fc0fffd13 Mon Sep 17 00:00:00 2001
From: Gunes Bayir <gunes.bayir@arm.com>
Date: Thu, 19 Jan 2023 15:56:00 +0000
Subject: Change dynamic fusion API to return destination tensor info

The new dynamic fusion API is introduced in the following patch:
https://review.mlplatform.org/c/ml/ComputeLibrary/+/8906

For each operator (except Conv2D, which is migrated in the above patch), we
   - remove destination tensor from is_supported, validate and create calls
   - make create_op return ITensorInfo* to the intermediate destination object

Affected operators:
   - DepthwiseConv2D
   - Cast
   - Elementwise Ops
   - Clamp
   - Reshape
   - Resize

Resolves: COMPMID-5777
Change-Id: Ib60ec8a5f081752808455d7a7d790f2ed0627059
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8991
Reviewed-by: Ramy Elgammal <ramy.elgammal@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Dynamic-Fusion: Ramy Elgammal <ramy.elgammal@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp |  30 +++---
 .../sketch/gpu/operators/GpuCast.cpp               |  65 ++++++-----
 .../sketch/gpu/operators/GpuClamp.cpp              |  75 ++++++++-----
 .../sketch/gpu/operators/GpuConv2d.cpp             |   6 +-
 .../sketch/gpu/operators/GpuDepthwiseConv2d.cpp    |  86 +++++++++------
 .../sketch/gpu/operators/GpuOutput.cpp             |   2 +-
 .../sketch/gpu/operators/GpuReshape.cpp            |  67 ++++++++----
 .../sketch/gpu/operators/GpuResize.cpp             |  68 +++++++-----
 .../internal/GpuElementwiseBinaryCommon.cpp        | 119 ++++++++++-----------
 .../internal/GpuElementwiseBinaryCommon.h          |  23 ++--
 10 files changed, 318 insertions(+), 223 deletions(-)

(limited to 'src/dynamic_fusion/sketch/gpu/operators')

diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
index 46033d842b..a02160cba8 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,6 @@
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
 
-#include "src/common/utils/Log.h"
-
 namespace arm_compute
 {
 namespace experimental
@@ -36,37 +34,33 @@ namespace dynamic_fusion
 {
 Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch,
                            const ITensorInfo       *lhs,
-                           const ITensorInfo       *rhs,
-                           const ITensorInfo       *dst)
+                           const ITensorInfo       *rhs)
 {
+    // Set the elementwise operation to ADD then call the elementwise common validate_op
     ElementwiseBinaryCommonAttributes common_attributes{};
     common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
-    return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, dst, common_attributes);
+    return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
 }
 
 Status GpuAdd::is_supported_op(const GpuWorkloadContext &context,
                                const ITensorInfo        *lhs,
-                               const ITensorInfo        *rhs,
-                               const ITensorInfo        *dst)
+                               const ITensorInfo        *rhs)
 {
+    // Set the elementwise operation to ADD then call the elementwise common is_supported_op
     ElementwiseBinaryCommonAttributes common_attributes{};
     common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
-    return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, dst, common_attributes);
+    return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
 }
 
-void GpuAdd::create_op(GpuWorkloadSketch &sketch,
-                       ITensorInfo       *lhs,
-                       ITensorInfo       *rhs,
-                       ITensorInfo       *dst)
+ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch,
+                               ITensorInfo       *lhs,
+                               ITensorInfo       *rhs)
 {
-    // Assert validation
-    ARM_COMPUTE_ERROR_THROW_ON(GpuAdd::validate_op(sketch, lhs, rhs, dst));
-    ARM_COMPUTE_LOG_PARAMS(lhs, rhs, dst);
-
+    // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op()
     // Set the elementwise operation to ADD then call the elementwise common create_op
     ElementwiseBinaryCommonAttributes common_attributes{};
     common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
-    GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, dst, common_attributes);
+    return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes);
 }
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
index 3a5b64ad9c..33c2d43e07 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,18 +38,22 @@ namespace dynamic_fusion
 {
 namespace
 {
-constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
-}
-Status GpuCast::is_supported_op(const GpuWorkloadContext &context,
-                                const ITensorInfo        *src,
-                                const ITensorInfo        *dst,
-                                const CastAttributes     &attributes)
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+                              const ITensorInfo        *src,
+                              const ITensorInfo        *dst,
+                              const CastAttributes     &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON(src == dst);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
 
-    // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+    if(dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
+
     auto_init_if_empty(dst_info_to_validate, src->clone()->set_data_type(attributes.data_type()));
 
     // Check support level
@@ -59,7 +63,7 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context,
                                                          DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S16,
                                                          DataType::U16, DataType::U32, DataType::S32, DataType::F16,
                                                          DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&dst_info_to_validate,
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr,
                                                          1,
                                                          DataType::U8, DataType::S8, DataType::QASYMM8, DataType::S16,
                                                          DataType::U16, DataType::U32, DataType::S32, DataType::F16,
@@ -76,7 +80,7 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context,
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
-            arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+            arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
             ARM_COMPUTE_RETURN_ON_ERROR(ClComponentCast::validate(properties, arguments, attributes, settings));
         }
     }
@@ -87,17 +91,27 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context,
 
     return Status{};
 }
+constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
+} // namespace
+
+Status GpuCast::is_supported_op(const GpuWorkloadContext &context,
+                                const ITensorInfo        *src,
+                                const CastAttributes     &attributes)
+{
+    return is_supported_op_helper(context, src, nullptr, attributes);
+}
 
 Status GpuCast::validate_op(const GpuWorkloadSketch &sketch,
                             const ITensorInfo       *src,
-                            const ITensorInfo       *dst,
                             const CastAttributes    &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
+    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
+
+    // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
 
     // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     auto_init_if_empty(dst_info_to_validate, src->clone()->set_data_type(attributes.data_type()));
 
     // Perform fusion test
@@ -110,18 +124,19 @@ Status GpuCast::validate_op(const GpuWorkloadSketch &sketch,
                                     "Operator fusion test failed. This operator cannot be fused into the workload");
 
     // Check if configuration is supported
-    return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
+    return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-void GpuCast::create_op(GpuWorkloadSketch    &sketch,
-                        ITensorInfo          *src,
-                        ITensorInfo          *dst,
-                        const CastAttributes &attributes)
+ITensorInfo *GpuCast::create_op(GpuWorkloadSketch    &sketch,
+                                ITensorInfo          *src,
+                                const CastAttributes &attributes)
 {
-    // Assert validation
-    ARM_COMPUTE_ERROR_THROW_ON(GpuCast::validate_op(sketch, src, dst, attributes));
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+    ARM_COMPUTE_LOG_PARAMS(src, attributes);
+    ARM_COMPUTE_ERROR_THROW_ON(GpuCast::validate_op(sketch, src, attributes));
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
 
     // Auto initialize dst tensor info if empty
     auto_init_if_empty(*dst, src->clone()->set_data_type(attributes.data_type()));
@@ -160,6 +175,8 @@ void GpuCast::create_op(GpuWorkloadSketch    &sketch,
 
     const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
     sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
 }
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
index ffef6115d6..89b533c9b8 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,24 +41,30 @@ namespace dynamic_fusion
 {
 namespace
 {
-constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
-} // namespace
-
-Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
-                                 const ITensorInfo        *src,
-                                 const ITensorInfo        *dst,
-                                 const ClampAttributes    &attributes)
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+                              const ITensorInfo        *src,
+                              const ITensorInfo        *dst,
+                              const ClampAttributes    &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value");
 
-    // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+    if(dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
+
     auto_init_if_empty(dst_info_to_validate, *src->clone());
 
     // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
-    const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+    const ClComponentActivation::Attributes act_info
+    {
+        ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val()
+    };
 
     // Check components
     if(context.gpu_language() == GpuLanguage::OpenCL)
@@ -68,7 +74,7 @@ Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
 
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC, src);
-        arguments.add_const_tensor(ACL_DST, &dst_info_to_validate);
+        arguments.add_const_tensor(ACL_DST, dst_info_to_validate_ptr);
         ARM_COMPUTE_RETURN_ON_ERROR(ClComponentActivation::validate(properties, arguments, act_info));
     }
     else
@@ -78,18 +84,29 @@ Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
     return Status{};
 }
 
+constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
+} // namespace
+
+Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
+                                 const ITensorInfo        *src,
+                                 const ClampAttributes    &attributes)
+{
+    return is_supported_op_helper(context, src, nullptr, attributes);
+}
+
 Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
                              const ITensorInfo       *src,
-                             const ITensorInfo       *dst,
                              const ClampAttributes   &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
 
     // Check if tensors have valid id, i.e. they are created from a sketch
-    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
+    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
+
+    // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
 
     // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     auto_init_if_empty(dst_info_to_validate, *src->clone());
 
     // Perform fusion test to check if the operator meets fusion constraints
@@ -101,18 +118,19 @@ Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
                                     "Operator fusion test failed. This operator cannot be fused into the workload");
 
     // Check if configuration is supported
-    return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
+    return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-void GpuClamp::create_op(GpuWorkloadSketch     &sketch,
-                         ITensorInfo           *src,
-                         ITensorInfo           *dst,
-                         const ClampAttributes &attributes)
+ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch     &sketch,
+                                 ITensorInfo           *src,
+                                 const ClampAttributes &attributes)
 {
-    // Assert validation
-    ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, dst, attributes));
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+    ARM_COMPUTE_LOG_PARAMS(src, attributes);
+    ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, attributes));
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
 
     // Auto initialize dst tensor
     auto_init_if_empty(*dst, *src->clone());
@@ -121,7 +139,10 @@ void GpuClamp::create_op(GpuWorkloadSketch     &sketch,
     GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
 
     // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
-    const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+    const ClComponentActivation::Attributes act_info
+    {
+        ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val()
+    };
 
     const auto *const sketch_ctx = sketch.implementation().context();
 
@@ -151,6 +172,8 @@ void GpuClamp::create_op(GpuWorkloadSketch     &sketch,
 
     const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
     sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
 }
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index 7a8b97957e..690371f910 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -131,10 +131,8 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
     {
         dst_info_to_validate_ptr = dst;
     }
-    else
-    {
-        calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes);
-    }
+
+    calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes);
 
     // Check support level
     // Data type
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
index b08af61d8f..0f9e726604 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,20 +61,26 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, c
     }
 }
 
-constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
-} // namespace
-
-Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext        &context,
-                                           const ITensorInfo               *src,
-                                           const ITensorInfo               *wei,
-                                           const ITensorInfo               *bia,
-                                           const ITensorInfo               *dst,
-                                           const DepthwiseConv2dAttributes &attributes)
+/* A helper method to reduce the duplication in dst tensor initialization
+*  when calling validate()
+*/
+Status is_supported_op_helper(const GpuWorkloadContext        &context,
+                              const ITensorInfo               *src,
+                              const ITensorInfo               *wei,
+                              const ITensorInfo               *bia,
+                              const ITensorInfo               *dst,
+                              const DepthwiseConv2dAttributes &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei);
+
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+    if(dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
 
-    // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes);
 
     // Check support level
@@ -100,12 +106,12 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext        &cont
                                                  attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
 
             // Get the depthwise convolution compute parameters
-            auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+            auto                       t        = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
             const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
 
             settings.fast_relaxed_math(
                 (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
-                && (dst_info_to_validate.data_type() == DataType::F32 || dst_info_to_validate.data_type() == DataType::F16));
+                && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16));
 
             settings.is_fma_available(get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
             .m0(dwc_info.m0)
@@ -117,7 +123,7 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext        &cont
             arguments.add_const_tensor(ACL_SRC_0, src);
             arguments.add_const_tensor(ACL_SRC_1, wei);
             arguments.add_const_tensor(ACL_SRC_2, bia);
-            arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+            arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
             ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings));
         }
     }
@@ -129,23 +135,36 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext        &cont
     return Status{};
 }
 
+constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext        &context,
+                                           const ITensorInfo               *src,
+                                           const ITensorInfo               *wei,
+                                           const ITensorInfo               *bia,
+                                           const DepthwiseConv2dAttributes &attributes)
+{
+    return is_supported_op_helper(context, src, wei, bia, nullptr, attributes);
+}
+
 Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch         &sketch,
                                        const ITensorInfo               *src,
                                        const ITensorInfo               *wei,
                                        const ITensorInfo               *bia,
-                                       const ITensorInfo               *dst,
                                        const DepthwiseConv2dAttributes &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
-    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id() || !dst->has_valid_id());
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei);
+    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id());
 
     if(bia != nullptr)
     {
         ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());
     }
 
+    // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
+
     // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes);
 
     // Perform fusion test
@@ -161,20 +180,21 @@ Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch         &sketch,
                                     "Operator fusion test failed. This operator cannot be fused into the workload");
 
     // Check if configuration is supported
-    return is_supported_op(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes);
+    return is_supported_op_helper(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes);
 }
 
-void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch               &sketch,
-                                   ITensorInfo                     *src,
-                                   ITensorInfo                     *wei,
-                                   ITensorInfo                     *bia,
-                                   ITensorInfo                     *dst,
-                                   const DepthwiseConv2dAttributes &attributes)
+ITensorInfo *GpuDepthwiseConv2d::create_op(GpuWorkloadSketch               &sketch,
+                                           ITensorInfo                     *src,
+                                           ITensorInfo                     *wei,
+                                           ITensorInfo                     *bia,
+                                           const DepthwiseConv2dAttributes &attributes)
 {
-    // Assert validation
-    ARM_COMPUTE_ERROR_THROW_ON(GpuDepthwiseConv2d::validate_op(sketch, src, wei, bia, dst, attributes));
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst);
-    ARM_COMPUTE_LOG_PARAMS(src, wei, bia, dst, attributes);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei);
+    ARM_COMPUTE_LOG_PARAMS(src, wei, bia, attributes);
+    ARM_COMPUTE_ERROR_THROW_ON(GpuDepthwiseConv2d::validate_op(sketch, src, wei, bia, attributes));
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
 
     calculate_and_init_dst_if_empty(dst, src, wei, attributes);
 
@@ -197,7 +217,7 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch               &sketch,
                                                  attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
 
             // Get the depthwise convolution compute parameters
-            auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+            auto                       t        = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
             const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
 
             settings.is_fma_available(get_arch_from_target(gpu_target) != GPUTarget::MIDGARD)
@@ -241,6 +261,8 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch               &sketch,
 
     const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
     sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
 }
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
index c906da8199..107a5e5fa7 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
@@ -41,7 +41,7 @@ namespace dynamic_fusion
 namespace
 {
 constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
-}
+} // namespace
 
 Status GpuOutput::is_supported_op(const GpuWorkloadContext &context,
                                   const ITensorInfo        *src,
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
index 7a4063d554..f5645f325f 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
@@ -37,15 +37,21 @@ namespace dynamic_fusion
 {
 namespace
 {
-GpuOperatorType operator_type = GpuOperatorType::Complex;
-}
-
-Status GpuReshape::is_supported_op(const GpuWorkloadContext &context,
-                                   const ITensorInfo        *src,
-                                   const ITensorInfo        *dst,
-                                   const Attributes         &attributes)
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+                              const ITensorInfo        *src,
+                              const ITensorInfo        *dst,
+                              const ReshapeAttributes &attributes)
 {
-    TensorInfo dst_info_to_validate = *dst;
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
+
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+    if(dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
+
     auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape()));
 
     // Check components
@@ -57,7 +63,7 @@ Status GpuReshape::is_supported_op(const GpuWorkloadContext &context,
         // Validate GpuReshape Component
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC_0, src);
-        arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+        arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
 
         ARM_COMPUTE_RETURN_ON_ERROR(ClComponentReshape::validate(arguments));
     }
@@ -68,16 +74,28 @@ Status GpuReshape::is_supported_op(const GpuWorkloadContext &context,
 
     return Status{};
 }
+
+GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+Status GpuReshape::is_supported_op(const GpuWorkloadContext &context,
+                                   const ITensorInfo        *src,
+                                   const Attributes         &attributes)
+{
+    return is_supported_op_helper(context, src, nullptr, attributes);
+}
+
 Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch,
                                const ITensorInfo       *src,
-                               const ITensorInfo       *dst,
                                const Attributes        &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
+    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
+
+    // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
 
     // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape()));
 
     // Perform fusion test
@@ -90,17 +108,20 @@ Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch,
                                     "Operator fusion test failed. This operator cannot be fused into the workload");
 
     // Check if configuration is supported
-    return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
+    return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-void GpuReshape::create_op(GpuWorkloadSketch &sketch,
-                           ITensorInfo       *src,
-                           ITensorInfo       *dst,
-                           const Attributes &attributes)
+ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch,
+                                   ITensorInfo       *src,
+                                   const Attributes &attributes)
 {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_LOG_PARAMS(src, dst, attributes.shape());
-    ARM_COMPUTE_ERROR_THROW_ON(GpuReshape::validate_op(sketch, src, dst, attributes));
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+    ARM_COMPUTE_LOG_PARAMS(src, attributes.shape());
+    ARM_COMPUTE_ERROR_THROW_ON(GpuReshape::validate_op(sketch, src, attributes));
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
+
     auto_init_if_empty(*dst, src->clone()->set_tensor_shape(attributes.shape()));
 
     // Translate into components and add to component graph
@@ -136,7 +157,9 @@ void GpuReshape::create_op(GpuWorkloadSketch &sketch,
 
     const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
     sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
 }
 } // namespace dynamic_fusion
 } // namespace experimental
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
index aa45f4c1a5..5f52eea7d0 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,17 +54,21 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, c
     }
 }
 
-constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
-}
-Status GpuResize::is_supported_op(const GpuWorkloadContext &context,
-                                  const ITensorInfo        *src,
-                                  const ITensorInfo        *dst,
-                                  const Attributes         &attributes)
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+                              const ITensorInfo        *src,
+                              const ITensorInfo        *dst,
+                              const ResizeAttributes   &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
+
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+    if(dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
 
-    // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes);
 
     // Check support level
@@ -88,7 +92,7 @@ Status GpuResize::is_supported_op(const GpuWorkloadContext &context,
 
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
-            arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+            arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
             ARM_COMPUTE_RETURN_ON_ERROR(ClComponentResize::validate(properties, arguments, attributes));
         }
     }
@@ -100,16 +104,27 @@ Status GpuResize::is_supported_op(const GpuWorkloadContext &context,
     return Status{};
 }
 
+constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+Status GpuResize::is_supported_op(const GpuWorkloadContext &context,
+                                  const ITensorInfo        *src,
+                                  const Attributes         &attributes)
+{
+    return is_supported_op_helper(context, src, nullptr, attributes);
+}
+
 Status GpuResize::validate_op(const GpuWorkloadSketch     &sketch,
                               const ITensorInfo           *src,
-                              const ITensorInfo           *dst,
                               const GpuResize::Attributes &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
+    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
 
-    // Auto initialize dst tensor info if empty
-    TensorInfo dst_info_to_validate = *dst;
+    // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
+
+    // Auto initialize dst tensor info
     calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes);
 
     // Perform fusion test
@@ -123,18 +138,19 @@ Status GpuResize::validate_op(const GpuWorkloadSketch     &sketch,
                                     "Operator fusion test failed. This operator cannot be fused into the workload");
 
     // Check if configuration is supported
-    return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
+    return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-void GpuResize::create_op(GpuWorkloadSketch           &sketch,
-                          ITensorInfo                 *src,
-                          ITensorInfo                 *dst,
-                          const GpuResize::Attributes &attributes)
+ITensorInfo *GpuResize::create_op(GpuWorkloadSketch           &sketch,
+                                  ITensorInfo                 *src,
+                                  const GpuResize::Attributes &attributes)
 {
-    // Assert validation
-    ARM_COMPUTE_ERROR_THROW_ON(GpuResize::validate_op(sketch, src, dst, attributes));
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+    ARM_COMPUTE_LOG_PARAMS(src, attributes);
+    ARM_COMPUTE_ERROR_THROW_ON(GpuResize::validate_op(sketch, src, attributes));
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
 
     // Auto initialize dst tensor info if empty
     calculate_and_init_dst_if_empty(dst, src, attributes);
@@ -172,6 +188,8 @@ void GpuResize::create_op(GpuWorkloadSketch           &sketch,
 
     const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
     sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
 }
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
index aec22e100c..7c087c9a7b 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+#include "src/common/utils/Log.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/dynamic_fusion/sketch/ArgumentPack.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
@@ -43,30 +44,23 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *lhs, c
         auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(broadcast_pair.first));
     }
 }
-GpuOperatorType operator_type = GpuOperatorType::Simple;
-}
 
-ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
+Status is_supported_op_helper(const GpuWorkloadContext                &context,
+                              const ITensorInfo                       *lhs,
+                              const ITensorInfo                       *rhs,
+                              const ITensorInfo                       *dst,
+                              const ElementwiseBinaryCommonAttributes &attributes)
 {
-    _operation = operation;
-    return *this;
-}
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
 
-ElementwiseBinaryCommonAttributes::ElementwiseOp ElementwiseBinaryCommonAttributes::operation() const
-{
-    return _operation;
-}
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext                &context,
-                                                   const ITensorInfo                       *lhs,
-                                                   const ITensorInfo                       *rhs,
-                                                   const ITensorInfo                       *dst,
-                                                   const ElementwiseBinaryCommonAttributes &attributes)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
+    if(dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
 
-    // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs);
 
     // Check components
@@ -80,16 +74,8 @@ Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, lhs);
             arguments.add_const_tensor(ACL_SRC_1, rhs);
+            arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
 
-            // We needed to pass the original dst pointer for in-place detection, in case its shape is not empty
-            if(dst->tensor_shape().total_size() == 0)
-            {
-                arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
-            }
-            else
-            {
-                arguments.add_const_tensor(ACL_DST_0, dst);
-            }
             ARM_COMPUTE_RETURN_ON_ERROR(ClComponentElementwiseBinary::validate(arguments, attributes));
         }
     }
@@ -101,18 +87,40 @@ Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext
     return Status{};
 }
 
+GpuOperatorType operator_type = GpuOperatorType::Simple;
+} // namespace
+
+ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
+{
+    _operation = operation;
+    return *this;
+}
+
+ElementwiseBinaryCommonAttributes::ElementwiseOp ElementwiseBinaryCommonAttributes::operation() const
+{
+    return _operation;
+}
+
+Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext                &context,
+                                                   const ITensorInfo                       *lhs,
+                                                   const ITensorInfo                       *rhs,
+                                                   const ElementwiseBinaryCommonAttributes &attributes)
+{
+    return is_supported_op_helper(context, lhs, rhs, nullptr, attributes);
+}
+
 Status GpuElementwiseBinaryCommon::validate_op(const GpuWorkloadSketch                 &sketch,
                                                const ITensorInfo                       *lhs,
                                                const ITensorInfo                       *rhs,
-                                               const ITensorInfo                       *dst,
                                                const ElementwiseBinaryCommonAttributes &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
-    ARM_COMPUTE_RETURN_ERROR_ON(
-        !lhs->has_valid_id() || !rhs->has_valid_id() || !dst->has_valid_id());
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+    ARM_COMPUTE_RETURN_ERROR_ON(!lhs->has_valid_id() || !rhs->has_valid_id());
+
+    // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
 
     // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs);
 
     // Perform fusion test
@@ -125,20 +133,21 @@ Status GpuElementwiseBinaryCommon::validate_op(const GpuWorkloadSketch
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
                                     "Operator fusion test failed. This operator cannot be fused into the workload");
 
-    // Check if configuration is supported, and passing the original dst for in-place detection
-    return is_supported_op(*sketch.gpu_context(), lhs, rhs, dst, attributes);
+    // Check if configuration is supported
+    return is_supported_op_helper(*sketch.gpu_context(), lhs, rhs, &dst_info_to_validate, attributes);
 }
 
-void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch                       &sketch,
-                                           ITensorInfo                             *lhs,
-                                           ITensorInfo                             *rhs,
-                                           ITensorInfo                             *dst,
-                                           const ElementwiseBinaryCommonAttributes &attributes)
+ITensorInfo *GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch                       &sketch,
+                                                   ITensorInfo                             *lhs,
+                                                   ITensorInfo                             *rhs,
+                                                   const ElementwiseBinaryCommonAttributes &attributes)
 {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
-    const bool        in_place = (lhs == dst) || (rhs == dst);
-    static TensorInfo in_place_dst;
-    in_place_dst = in_place ? sketch.create_tensor_info(*lhs) : TensorInfo{};
+    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs);
+    ARM_COMPUTE_LOG_PARAMS(lhs, rhs);
+    ARM_COMPUTE_ERROR_THROW_ON(GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, attributes));
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
 
     // Auto initialize dst tensor
     calculate_and_init_dst_if_empty(dst, lhs, rhs);
@@ -160,14 +169,7 @@ void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, lhs);
             arguments.add_const_tensor(ACL_SRC_1, rhs);
-            if(in_place)
-            {
-                arguments.add_const_tensor(ACL_DST_0, &in_place_dst);
-            }
-            else
-            {
-                arguments.add_const_tensor(ACL_DST_0, dst);
-            }
+            arguments.add_const_tensor(ACL_DST_0, dst);
             comp_graph.add_new_component<ClComponentElementwiseBinary>(properties, arguments, attributes);
         }
     }
@@ -183,16 +185,11 @@ void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch
     ArgumentPack<ITensorInfo> tensors;
     tensors.add_const_tensor(ACL_SRC_0, lhs);
     tensors.add_const_tensor(ACL_SRC_1, rhs);
-    if(in_place)
-    {
-        tensors.add_const_tensor(ACL_DST_0, &in_place_dst);
-    }
-    else
-    {
-        tensors.add_tensor(ACL_DST_0, dst);
-    }
+    tensors.add_tensor(ACL_DST_0, dst);
     const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
     sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
 }
 
 } // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
index b00d069389..cbefa379e6 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
+++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
@@ -25,6 +25,7 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_INTERNAL_GPUELEMENTWISEBINARYCOMMON
 
 #include "arm_compute/core/Error.h"
+#include "arm_compute/core/ITensorInfo.h"
 
 namespace arm_compute
 {
@@ -76,34 +77,36 @@ public:
      * @param[in,out] sketch     Workload sketch into which the operator will be fused
      * @param[in]     lhs        Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32.
      * @param[in]     rhs        Right hand side tensor info. Data types supported: U8/S16/S32/F16/F32.
-     * @param[out]    dst        Destination tensor info. Data types supported: U8/S16/S32/F16/F32. If an uninitialized ITensorInfo is passed in, it will be auto-initialized
      * @param[in]     attributes ElementwiseBinaryCommonAttributes containing the operator type: ADD, SUB, DIV, ... etc.
+     *
+     * @return Pointer for the destination tensor info
      */
-    static void create_op(GpuWorkloadSketch                       &sketch,
-                          ITensorInfo                             *lhs,
-                          ITensorInfo                             *rhs,
-                          ITensorInfo                             *dst,
-                          const ElementwiseBinaryCommonAttributes &attributes);
+    static ITensorInfo *create_op(GpuWorkloadSketch                       &sketch,
+                                  ITensorInfo                             *lhs,
+                                  ITensorInfo                             *rhs,
+                                  const ElementwiseBinaryCommonAttributes &attributes);
     /** Check if the operator configuration is supported, irrespective of fusion
      *
      * @param[in] context    Workload context within which the operator is running
      * @param[in] lhs        Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32.
      * @param[in] rhs        Right hand side tensor info. Data types supported: U8/S16/S32/F16/F32.
-     * @param[in] dst        Destination tensor info. Data types supported: U8/S16/S32/F16/F32. If an uninitialized ITensorInfo is passed in, it will be auto-initialized
      * @param[in] attributes ElementwiseBinaryCommonAttributes containing the operator type: ADD, SUB, DIV, ... etc.
+     *
+     * @return Status
      */
     static Status is_supported_op(const GpuWorkloadContext                &context,
                                   const ITensorInfo                       *lhs,
                                   const ITensorInfo                       *rhs,
-                                  const ITensorInfo                       *dst,
                                   const ElementwiseBinaryCommonAttributes &attributes);
     /** Validate the operator and check if it can be fused into the workload sketch.
-     * Similar to @ref GpuElementwiseBinaryCommon::create_op()
+     *
+     * Parameters are similar to @ref GpuElementwiseBinaryCommon::create_op()
+     *
+     * @return Status
      */
     static Status validate_op(const GpuWorkloadSketch                 &sketch,
                               const ITensorInfo                       *rhs,
                               const ITensorInfo                       *lhs,
-                              const ITensorInfo                       *dst,
                               const ElementwiseBinaryCommonAttributes &attributes);
 };
 } // namespace dynamic_fusion
-- 
cgit v1.2.1