From cc2877368d5e15d9ea89d31c84ec651fc0fffd13 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Thu, 19 Jan 2023 15:56:00 +0000 Subject: Change dynamic fusion API to return destination tensor info The new dynamic fusion API is introduced in the following patch: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8906 For each operator (except Conv2D, which is migrated in the above patch), we - remove destination tensor from is_supported, validate and create calls - make create_op return ITensorInfo* to the intermediate destination object Affected operators: - DepthwiseConv2D - Cast - Elementwise Ops - Clamp - Reshape - Resize Resolves: COMPMID-5777 Change-Id: Ib60ec8a5f081752808455d7a7d790f2ed0627059 Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8991 Reviewed-by: Ramy Elgammal Reviewed-by: Jakub Sujak Dynamic-Fusion: Ramy Elgammal Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp | 30 +++--- .../sketch/gpu/operators/GpuCast.cpp | 65 ++++++----- .../sketch/gpu/operators/GpuClamp.cpp | 75 ++++++++----- .../sketch/gpu/operators/GpuConv2d.cpp | 6 +- .../sketch/gpu/operators/GpuDepthwiseConv2d.cpp | 86 +++++++++------ .../sketch/gpu/operators/GpuOutput.cpp | 2 +- .../sketch/gpu/operators/GpuReshape.cpp | 67 ++++++++---- .../sketch/gpu/operators/GpuResize.cpp | 68 +++++++----- .../internal/GpuElementwiseBinaryCommon.cpp | 119 ++++++++++----------- .../internal/GpuElementwiseBinaryCommon.h | 23 ++-- 10 files changed, 318 insertions(+), 223 deletions(-) (limited to 'src/dynamic_fusion/sketch/gpu/operators') diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp index 46033d842b..a02160cba8 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,6 @@ #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" -#include "src/common/utils/Log.h" - namespace arm_compute { namespace experimental @@ -36,37 +34,33 @@ namespace dynamic_fusion { Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, - const ITensorInfo *rhs, - const ITensorInfo *dst) + const ITensorInfo *rhs) { + // Set the elementwise operation to ADD then call the elementwise common validate_op ElementwiseBinaryCommonAttributes common_attributes{}; common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); - return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, dst, common_attributes); + return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); } Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, - const ITensorInfo *rhs, - const ITensorInfo *dst) + const ITensorInfo *rhs) { + // Set the elementwise operation to ADD then call the elementwise common is_supported_op ElementwiseBinaryCommonAttributes common_attributes{}; common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); - return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, dst, common_attributes); + return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); } -void GpuAdd::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs, - ITensorInfo *dst) +ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuAdd::validate_op(sketch, lhs, rhs, dst)); - ARM_COMPUTE_LOG_PARAMS(lhs, rhs, dst); - + // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op() // Set the elementwise operation to ADD then call the elementwise common create_op ElementwiseBinaryCommonAttributes common_attributes{}; common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); - GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, dst, common_attributes); + return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes); } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp index 3a5b64ad9c..33c2d43e07 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,18 +38,22 @@ namespace dynamic_fusion { namespace { -constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; -} -Status GpuCast::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst, - const CastAttributes &attributes) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const CastAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON(src == dst); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } + auto_init_if_empty(dst_info_to_validate, src->clone()->set_data_type(attributes.data_type())); // Check support level @@ -59,7 +63,7 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&dst_info_to_validate, + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, @@ -76,7 +80,7 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context, ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, src); - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentCast::validate(properties, arguments, attributes, settings)); } } @@ -87,17 +91,27 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context, return Status{}; } +constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; +} // namespace + +Status GpuCast::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const CastAttributes &attributes) +{ + return is_supported_op_helper(context, src, nullptr, attributes); +} Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const CastAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); + + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; auto_init_if_empty(dst_info_to_validate, src->clone()->set_data_type(attributes.data_type())); // Perform fusion test @@ -110,18 +124,19 @@ Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuCast::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const CastAttributes &attributes) +ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const CastAttributes &attributes) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuCast::validate_op(sketch, src, dst, attributes)); - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_LOG_PARAMS(src, dst, attributes); + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, attributes); + ARM_COMPUTE_ERROR_THROW_ON(GpuCast::validate_op(sketch, src, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); // Auto initialize dst tensor info if empty auto_init_if_empty(*dst, src->clone()->set_data_type(attributes.data_type())); @@ -160,6 +175,8 @@ void GpuCast::create_op(GpuWorkloadSketch &sketch, const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp index ffef6115d6..89b533c9b8 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,24 +41,30 @@ namespace dynamic_fusion { namespace { -constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; -} // namespace - -Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst, - const ClampAttributes &attributes) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const ClampAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value"); - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } + auto_init_if_empty(dst_info_to_validate, *src->clone()); // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped - const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() }; + const ClComponentActivation::Attributes act_info + { + ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() + }; // Check components if(context.gpu_language() == GpuLanguage::OpenCL) @@ -68,7 +74,7 @@ Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC, src); - arguments.add_const_tensor(ACL_DST, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentActivation::validate(properties, arguments, act_info)); } else @@ -78,18 +84,29 @@ Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, return Status{}; } +constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; +} // namespace + +Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ClampAttributes &attributes) +{ + return is_supported_op_helper(context, src, nullptr, attributes); +} + Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const ClampAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); // Check if tensors have valid id, i.e. they are created from a sketch - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); + + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; auto_init_if_empty(dst_info_to_validate, *src->clone()); // Perform fusion test to check if the operator meets fusion constraints @@ -101,18 +118,19 @@ Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuClamp::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const ClampAttributes &attributes) +ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const ClampAttributes &attributes) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, dst, attributes)); - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_LOG_PARAMS(src, dst, attributes); + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, attributes); + ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); // Auto initialize dst tensor auto_init_if_empty(*dst, *src->clone()); @@ -121,7 +139,10 @@ void GpuClamp::create_op(GpuWorkloadSketch &sketch, GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph(); // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped - const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() }; + const ClComponentActivation::Attributes act_info + { + ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() + }; const auto *const sketch_ctx = sketch.implementation().context(); @@ -151,6 +172,8 @@ void GpuClamp::create_op(GpuWorkloadSketch &sketch, const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp index 7a8b97957e..690371f910 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp @@ -131,10 +131,8 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, { dst_info_to_validate_ptr = dst; } - else - { - calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes); - } + + calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes); // Check support level // Data type diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp index b08af61d8f..0f9e726604 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,20 +61,26 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, c } } -constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; -} // namespace - -Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *wei, - const ITensorInfo *bia, - const ITensorInfo *dst, - const DepthwiseConv2dAttributes &attributes) +/* A helper method to reduce the duplication in dst tensor initialization +* when calling validate() +*/ +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *wei, + const ITensorInfo *bia, + const ITensorInfo *dst, + const DepthwiseConv2dAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei); + + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes); // Check support level @@ -100,12 +106,12 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR); // Get the depthwise convolution compute parameters - auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); + auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier()); settings.fast_relaxed_math( (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) - && (dst_info_to_validate.data_type() == DataType::F32 || dst_info_to_validate.data_type() == DataType::F16)); + && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16)); settings.is_fma_available(get_arch_from_target(gpu_target) == GPUTarget::MIDGARD) .m0(dwc_info.m0) @@ -117,7 +123,7 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont arguments.add_const_tensor(ACL_SRC_0, src); arguments.add_const_tensor(ACL_SRC_1, wei); arguments.add_const_tensor(ACL_SRC_2, bia); - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings)); } } @@ -129,23 +135,36 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont return Status{}; } +constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; +} // namespace + +Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *wei, + const ITensorInfo *bia, + const DepthwiseConv2dAttributes &attributes) +{ + return is_supported_op_helper(context, src, wei, bia, nullptr, attributes); +} + Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *wei, const ITensorInfo *bia, - const ITensorInfo *dst, const DepthwiseConv2dAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst); - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id()); if(bia != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id()); } + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; + // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes); // Perform fusion test @@ -161,20 +180,21 @@ Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes); } -void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *wei, - ITensorInfo *bia, - ITensorInfo *dst, - const DepthwiseConv2dAttributes &attributes) +ITensorInfo *GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + ITensorInfo *wei, + ITensorInfo *bia, + const DepthwiseConv2dAttributes &attributes) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuDepthwiseConv2d::validate_op(sketch, src, wei, bia, dst, attributes)); - ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst); - ARM_COMPUTE_LOG_PARAMS(src, wei, bia, dst, attributes); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei); + ARM_COMPUTE_LOG_PARAMS(src, wei, bia, attributes); + ARM_COMPUTE_ERROR_THROW_ON(GpuDepthwiseConv2d::validate_op(sketch, src, wei, bia, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); calculate_and_init_dst_if_empty(dst, src, wei, attributes); @@ -197,7 +217,7 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch, attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR); // Get the depthwise convolution compute parameters - auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); + auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier()); settings.is_fma_available(get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) @@ -241,6 +261,8 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch, const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp index c906da8199..107a5e5fa7 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp @@ -41,7 +41,7 @@ namespace dynamic_fusion namespace { constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; -} +} // namespace Status GpuOutput::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp index 7a4063d554..f5645f325f 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp @@ -37,15 +37,21 @@ namespace dynamic_fusion { namespace { -GpuOperatorType operator_type = GpuOperatorType::Complex; -} - -Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst, - const Attributes &attributes) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const ReshapeAttributes &attributes) { - TensorInfo dst_info_to_validate = *dst; + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } + auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape())); // Check components @@ -57,7 +63,7 @@ Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, // Validate GpuReshape Component ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, src); - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentReshape::validate(arguments)); } @@ -68,16 +74,28 @@ Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, return Status{}; } + +GpuOperatorType operator_type = GpuOperatorType::Complex; +} // namespace + +Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const Attributes &attributes) +{ + return is_supported_op_helper(context, src, nullptr, attributes); +} + Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); + + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape())); // Perform fusion test @@ -90,17 +108,20 @@ Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuReshape::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const Attributes &attributes) +ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const Attributes &attributes) { - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_LOG_PARAMS(src, dst, attributes.shape()); - ARM_COMPUTE_ERROR_THROW_ON(GpuReshape::validate_op(sketch, src, dst, attributes)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, attributes.shape()); + ARM_COMPUTE_ERROR_THROW_ON(GpuReshape::validate_op(sketch, src, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); + auto_init_if_empty(*dst, src->clone()->set_tensor_shape(attributes.shape())); // Translate into components and add to component graph @@ -136,7 +157,9 @@ void GpuReshape::create_op(GpuWorkloadSketch &sketch, const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion } // namespace experimental -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp index aa45f4c1a5..5f52eea7d0 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,17 +54,21 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, c } } -constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; -} -Status GpuResize::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst, - const Attributes &attributes) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const ResizeAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes); // Check support level @@ -88,7 +92,7 @@ Status GpuResize::is_supported_op(const GpuWorkloadContext &context, ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, src); - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentResize::validate(properties, arguments, attributes)); } } @@ -100,16 +104,27 @@ Status GpuResize::is_supported_op(const GpuWorkloadContext &context, return Status{}; } +constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; +} // namespace + +Status GpuResize::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const Attributes &attributes) +{ + return is_supported_op_helper(context, src, nullptr, attributes); +} + Status GpuResize::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const GpuResize::Attributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); - // Auto initialize dst tensor info if empty - TensorInfo dst_info_to_validate = *dst; + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; + + // Auto initialize dst tensor info calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes); // Perform fusion test @@ -123,18 +138,19 @@ Status GpuResize::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuResize::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const GpuResize::Attributes &attributes) +ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const GpuResize::Attributes &attributes) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuResize::validate_op(sketch, src, dst, attributes)); - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_LOG_PARAMS(src, dst, attributes); + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, attributes); + ARM_COMPUTE_ERROR_THROW_ON(GpuResize::validate_op(sketch, src, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); // Auto initialize dst tensor info if empty calculate_and_init_dst_if_empty(dst, src, attributes); @@ -172,6 +188,8 @@ void GpuResize::create_op(GpuWorkloadSketch &sketch, const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp index aec22e100c..7c087c9a7b 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" +#include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" @@ -43,30 +44,23 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *lhs, c auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(broadcast_pair.first)); } } -GpuOperatorType operator_type = GpuOperatorType::Simple; -} -ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs, + const ITensorInfo *dst, + const ElementwiseBinaryCommonAttributes &attributes) { - _operation = operation; - return *this; -} + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); -ElementwiseBinaryCommonAttributes::ElementwiseOp ElementwiseBinaryCommonAttributes::operation() const -{ - return _operation; -} + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; -Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *lhs, - const ITensorInfo *rhs, - const ITensorInfo *dst, - const ElementwiseBinaryCommonAttributes &attributes) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst); + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs); // Check components @@ -80,16 +74,8 @@ Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, lhs); arguments.add_const_tensor(ACL_SRC_1, rhs); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); - // We needed to pass the original dst pointer for in-place detection, in case its shape is not empty - if(dst->tensor_shape().total_size() == 0) - { - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); - } - else - { - arguments.add_const_tensor(ACL_DST_0, dst); - } ARM_COMPUTE_RETURN_ON_ERROR(ClComponentElementwiseBinary::validate(arguments, attributes)); } } @@ -101,18 +87,40 @@ Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext return Status{}; } +GpuOperatorType operator_type = GpuOperatorType::Simple; +} // namespace + +ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation) +{ + _operation = operation; + return *this; +} + +ElementwiseBinaryCommonAttributes::ElementwiseOp ElementwiseBinaryCommonAttributes::operation() const +{ + return _operation; +} + +Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs, + const ElementwiseBinaryCommonAttributes &attributes) +{ + return is_supported_op_helper(context, lhs, rhs, nullptr, attributes); +} + Status GpuElementwiseBinaryCommon::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs, - const ITensorInfo *dst, const ElementwiseBinaryCommonAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst); - ARM_COMPUTE_RETURN_ERROR_ON( - !lhs->has_valid_id() || !rhs->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON(!lhs->has_valid_id() || !rhs->has_valid_id()); + + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs); // Perform fusion test @@ -125,20 +133,21 @@ Status GpuElementwiseBinaryCommon::validate_op(const GpuWorkloadSketch ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op), "Operator fusion test failed. This operator cannot be fused into the workload"); - // Check if configuration is supported, and passing the original dst for in-place detection - return is_supported_op(*sketch.gpu_context(), lhs, rhs, dst, attributes); + // Check if configuration is supported + return is_supported_op_helper(*sketch.gpu_context(), lhs, rhs, &dst_info_to_validate, attributes); } -void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs, - ITensorInfo *dst, - const ElementwiseBinaryCommonAttributes &attributes) +ITensorInfo *GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs, + const ElementwiseBinaryCommonAttributes &attributes) { - ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst); - const bool in_place = (lhs == dst) || (rhs == dst); - static TensorInfo in_place_dst; - in_place_dst = in_place ? sketch.create_tensor_info(*lhs) : TensorInfo{}; + ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_LOG_PARAMS(lhs, rhs); + ARM_COMPUTE_ERROR_THROW_ON(GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); // Auto initialize dst tensor calculate_and_init_dst_if_empty(dst, lhs, rhs); @@ -160,14 +169,7 @@ void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, lhs); arguments.add_const_tensor(ACL_SRC_1, rhs); - if(in_place) - { - arguments.add_const_tensor(ACL_DST_0, &in_place_dst); - } - else - { - arguments.add_const_tensor(ACL_DST_0, dst); - } + arguments.add_const_tensor(ACL_DST_0, dst); comp_graph.add_new_component(properties, arguments, attributes); } } @@ -183,16 +185,11 @@ void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch ArgumentPack tensors; tensors.add_const_tensor(ACL_SRC_0, lhs); tensors.add_const_tensor(ACL_SRC_1, rhs); - if(in_place) - { - tensors.add_const_tensor(ACL_DST_0, &in_place_dst); - } - else - { - tensors.add_tensor(ACL_DST_0, dst); - } + tensors.add_tensor(ACL_DST_0, dst); const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h index b00d069389..cbefa379e6 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h +++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h @@ -25,6 +25,7 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_INTERNAL_GPUELEMENTWISEBINARYCOMMON #include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" namespace arm_compute { @@ -76,34 +77,36 @@ public: * @param[in,out] sketch Workload sketch into which the operator will be fused * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32. * @param[in] rhs Right hand side tensor info. Data types supported: U8/S16/S32/F16/F32. - * @param[out] dst Destination tensor info. Data types supported: U8/S16/S32/F16/F32. If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes ElementwiseBinaryCommonAttributes containing the operator type: ADD, SUB, DIV, ... etc. + * + * @return Pointer for the destination tensor info */ - static void create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs, - ITensorInfo *dst, - const ElementwiseBinaryCommonAttributes &attributes); + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs, + const ElementwiseBinaryCommonAttributes &attributes); /** Check if the operator configuration is supported, irrespective of fusion * * @param[in] context Workload context within which the operator is running * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32. * @param[in] rhs Right hand side tensor info. Data types supported: U8/S16/S32/F16/F32. - * @param[in] dst Destination tensor info. Data types supported: U8/S16/S32/F16/F32. If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes ElementwiseBinaryCommonAttributes containing the operator type: ADD, SUB, DIV, ... etc. + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs, - const ITensorInfo *dst, const ElementwiseBinaryCommonAttributes &attributes); /** Validate the operator and check if it can be fused into the workload sketch. - * Similar to @ref GpuElementwiseBinaryCommon::create_op() + * + * Parameters are similar to @ref GpuElementwiseBinaryCommon::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *rhs, const ITensorInfo *lhs, - const ITensorInfo *dst, const ElementwiseBinaryCommonAttributes &attributes); }; } // namespace dynamic_fusion -- cgit v1.2.1