1 files changed, 49 insertions, 26 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
index ffef6115d6..89b533c9b8 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,24 +41,30 @@ namespace dynamic_fusion
 {
 namespace
 {
-constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
-} // namespace
-
-Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
-                                 const ITensorInfo        *src,
-                                 const ITensorInfo        *dst,
-                                 const ClampAttributes    &attributes)
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+                              const ITensorInfo        *src,
+                              const ITensorInfo        *dst,
+                              const ClampAttributes    &attributes)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value");
 
-    // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
+    TensorInfo         dst_info_to_validate;
+    const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+    if(dst != nullptr)
+    {
+        dst_info_to_validate_ptr = dst;
+    }
+
     auto_init_if_empty(dst_info_to_validate, *src->clone());
 
     // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
-    const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+    const ClComponentActivation::Attributes act_info
+    {
+        ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val()
+    };
 
     // Check components
     if(context.gpu_language() == GpuLanguage::OpenCL)
@@ -68,7 +74,7 @@ Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
 
         ArgumentPack<ITensorInfo> arguments;
         arguments.add_const_tensor(ACL_SRC, src);
-        arguments.add_const_tensor(ACL_DST, &dst_info_to_validate);
+        arguments.add_const_tensor(ACL_DST, dst_info_to_validate_ptr);
         ARM_COMPUTE_RETURN_ON_ERROR(ClComponentActivation::validate(properties, arguments, act_info));
     }
     else
@@ -78,18 +84,29 @@ Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
     return Status{};
 }
 
+constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
+} // namespace
+
+Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
+                                 const ITensorInfo        *src,
+                                 const ClampAttributes    &attributes)
+{
+    return is_supported_op_helper(context, src, nullptr, attributes);
+}
+
 Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
                              const ITensorInfo       *src,
-                             const ITensorInfo       *dst,
                              const ClampAttributes   &attributes)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
 
     // Check if tensors have valid id, i.e. they are created from a sketch
-    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
+    ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
+
+    // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+    TensorInfo dst_info_to_validate;
 
     // Auto initialize dst tensor info
-    TensorInfo dst_info_to_validate = *dst;
     auto_init_if_empty(dst_info_to_validate, *src->clone());
 
     // Perform fusion test to check if the operator meets fusion constraints
@@ -101,18 +118,19 @@ Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
                                     "Operator fusion test failed. This operator cannot be fused into the workload");
 
     // Check if configuration is supported
-    return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
+    return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
 }
 
-void GpuClamp::create_op(GpuWorkloadSketch     &sketch,
-                         ITensorInfo           *src,
-                         ITensorInfo           *dst,
-                         const ClampAttributes &attributes)
+ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch     &sketch,
+                                 ITensorInfo           *src,
+                                 const ClampAttributes &attributes)
 {
-    // Assert validation
-    ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, dst, attributes));
-    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
-    ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+    ARM_COMPUTE_LOG_PARAMS(src, attributes);
+    ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, attributes));
+
+    ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+    ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
 
     // Auto initialize dst tensor
     auto_init_if_empty(*dst, *src->clone());
@@ -121,7 +139,10 @@ void GpuClamp::create_op(GpuWorkloadSketch     &sketch,
     GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
 
     // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
-    const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+    const ClComponentActivation::Attributes act_info
+    {
+        ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val()
+    };
 
     const auto *const sketch_ctx = sketch.implementation().context();
 
@@ -151,6 +172,8 @@ void GpuClamp::create_op(GpuWorkloadSketch     &sketch,
 
     const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
     sketch.implementation().operator_group().add_operator(op);
+
+    return dst;
 }
 
 } // namespace dynamic_fusion