From cc2877368d5e15d9ea89d31c84ec651fc0fffd13 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Thu, 19 Jan 2023 15:56:00 +0000 Subject: Change dynamic fusion API to return destination tensor info The new dynamic fusion API is introduced in the following patch: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8906 For each operator (except Conv2D, which is migrated in the above patch), we - remove destination tensor from is_supported, validate and create calls - make create_op return ITensorInfo* to the intermediate destination object Affected operators: - DepthwiseConv2D - Cast - Elementwise Ops - Clamp - Reshape - Resize Resolves: COMPMID-5777 Change-Id: Ib60ec8a5f081752808455d7a7d790f2ed0627059 Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8991 Reviewed-by: Ramy Elgammal Reviewed-by: Jakub Sujak Dynamic-Fusion: Ramy Elgammal Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- .../dynamic_fusion/sketch/gpu/operators/GpuAdd.h | 24 +++-- .../dynamic_fusion/sketch/gpu/operators/GpuCast.h | 30 +++--- .../dynamic_fusion/sketch/gpu/operators/GpuClamp.h | 24 ++--- .../sketch/gpu/operators/GpuConv2d.h | 9 +- .../sketch/gpu/operators/GpuDepthwiseConv2d.h | 32 +++--- .../sketch/gpu/operators/GpuOutput.h | 9 +- .../sketch/gpu/operators/GpuReshape.h | 20 ++-- .../sketch/gpu/operators/GpuResize.h | 30 +++--- .../components/cl/ClComponentElementwiseBinary.cpp | 5 - src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp | 30 +++--- .../sketch/gpu/operators/GpuCast.cpp | 65 ++++++----- .../sketch/gpu/operators/GpuClamp.cpp | 75 ++++++++----- .../sketch/gpu/operators/GpuConv2d.cpp | 6 +- .../sketch/gpu/operators/GpuDepthwiseConv2d.cpp | 86 +++++++++------ .../sketch/gpu/operators/GpuOutput.cpp | 2 +- .../sketch/gpu/operators/GpuReshape.cpp | 67 ++++++++---- .../sketch/gpu/operators/GpuResize.cpp | 68 +++++++----- .../internal/GpuElementwiseBinaryCommon.cpp | 119 ++++++++++----------- .../internal/GpuElementwiseBinaryCommon.h | 23 ++-- .../validation/dynamic_fusion/gpu/Integration.cpp | 48 ++++----- tests/validation/dynamic_fusion/gpu/cl/Add.cpp | 59 +--------- tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp | 23 +--- .../dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp | 93 +++++----------- tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp | 23 ++-- tests/validation/dynamic_fusion/gpu/cl/Resize.cpp | 48 ++++----- .../dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h | 16 ++- .../gpu/cl/ElementwiseBinaryFixture.h | 23 ++-- .../dynamic_fusion/operators/CastFixture.h | 8 +- .../dynamic_fusion/operators/ClampFixture.h | 20 ++-- .../dynamic_fusion/operators/ReshapeFixture.h | 10 +- .../dynamic_fusion/operators/ResizeFixture.h | 8 +- 31 files changed, 533 insertions(+), 570 deletions(-) diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h index a2ee7f7dc1..6ac5d4e500 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h @@ -60,30 +60,32 @@ public: * @param[in,out] sketch Workload sketch into which the operator will be fused * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32. * @param[in] rhs Right hand side tensor info. Data types supported: U8/S16/S32/F16/F32. - * @param[out] dst Destination tensor info. Data types supported: U8/S16/S32/F16/F32. If an uninitialized ITensorInfo is passed in, it will be auto-initialized + * + * @return Pointer for the destination tensor info */ - static void create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs, - ITensorInfo *dst); + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs); /** Check if the operator configuration is supported, irrespective of fusion * * @param[in] context Workload context within which the operator is running * @param[in] lhs Left hand side tensor info. * @param[in] rhs Right hand side tensor info. - * @param[in] dst Destination tensor info. If an uninitialized ITensorInfo is passed in, it will be auto-initialized + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, - const ITensorInfo *rhs, - const ITensorInfo *dst); + const ITensorInfo *rhs); /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch. - * Similar to @ref GpuAdd::create_op() + * + * Parameters are similar to @ref GpuAdd::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *rhs, - const ITensorInfo *lhs, - const ITensorInfo *dst); + const ITensorInfo *lhs); }; } // namespace dynamic_fusion } // namespace experimental diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h index 4b427be06a..1ba05ae5b8 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -65,32 +65,32 @@ public: * * @param[in,out] sketch Workload sketch into which the operator will be fused * @param[in] src Left hand side tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[out] dst Destination tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes Operator attributes + * + * @return Pointer for the destination tensor info */ - static void create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const Attributes &attributes); + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const Attributes &attributes); /** Check if the operator configuration is supported, irrespective of fusion * - * @param[in] context Workload context within which the operator is running - * @param[in] src Left hand side tensor info. Data types supported: All. - * @param[out] dst Destination tensor info. Data types supported: All. - * If an uninitialized ITensorInfo is passed in, it will be auto-initialized - * @param[in] attributes Operator attributes + * @param[in] context Workload context within which the operator is running + * @param[in] src Left hand side tensor info. Data types supported: All. + * @param[in] attributes Operator attributes + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes); /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch. - * Similar to @ref GpuCast::create_op() + * + * Parameters are similar to @ref GpuCast::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes); }; } // namespace dynamic_fusion diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h index 66d6c5f300..e96251196a 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -58,34 +58,34 @@ public: * * @param[in, out] sketch Workload sketch into which the operator will be fused * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[out] dst Destination tensor info. Data types supported: F16/F32. - * If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes Operator attributes + * + * @return Pointer for the destination tensor info */ - static void create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const Attributes &attributes); + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const Attributes &attributes); /** Check if the operator configuration is supported, irrespective of fusion * * @param[in] context Workload context within which the operator is running * @param[in] src Source tensor info. Data types supported: F16/F32. - * @param[in] dst Destination tensor info. Data types supported: F16/F32. - * If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes Operator attributes + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes); /** Validate the operator and check if it can be fused into the workload sketch. - * Similar to @ref GpuClamp::create_op() + * + * Parameters are similar to @ref GpuClamp::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes); }; } // namespace dynamic_fusion diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h index 42c63df87f..612cc83a1f 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h @@ -62,7 +62,7 @@ public: * @param[in] bia (Optional) Bias tensor * @param[in] attributes Operator attributes * - * @return pointer for the destination tensor + * @return Pointer for the destination tensor info */ static ITensorInfo *create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, @@ -76,6 +76,8 @@ public: * @param[in] wei Weight tensor * @param[in] bia (Optional) Bias tensor * @param[in] attributes Operator attributes + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, @@ -83,7 +85,10 @@ public: const ITensorInfo *bia, const Attributes &attributes); /** Check if the operator configuration is supported and if it can be fused into the workload sketch. - * Similar to @ref GpuConv2d::create_op() + * + * Parameters are similar to @ref GpuConv2d::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h index a36ab62143..a0cb292730 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -59,34 +59,42 @@ public: * @param[in] src Source tensor * @param[in] wei Weight tensor * @param[in] bia (Optional) Bias tensor - * @param[out] dst Destination tensor. If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes Operator attributes + * + * @return Pointer for the destination tensor info */ - static void create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *wei, - ITensorInfo *bia, - ITensorInfo *dst, - const Attributes &attributes); + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + ITensorInfo *wei, + ITensorInfo *bia, + const Attributes &attributes); /** Check if the operator configuration is supported, irrespective of fusion - * Similar to @ref GpuDepthwiseConv2d::create_op() + * + * @param[in] context Workload context within which the operator is running + * @param[in] src Source tensor + * @param[in] wei Weight tensor + * @param[in] bia (Optional) Bias tensor + * @param[in] attributes Operator attributes + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *wei, const ITensorInfo *bia, - const ITensorInfo *dst, const Attributes &attributes); /** Check if the operator configuration is supported and if it can be fused into the workload sketch. - * Similar to @ref GpuDepthwiseConv2d::create_op() + * + * Parameters are similar to @ref GpuDepthwiseConv2d::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *wei, const ITensorInfo *bia, - const ITensorInfo *dst, const Attributes &attributes); }; } // namespace dynamic_fusion diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h index 2511b0efd5..06317511cd 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,7 +33,6 @@ namespace experimental { namespace dynamic_fusion { - /** Forward declaration */ class GpuWorkloadContext; class GpuWorkloadSketch; @@ -66,6 +65,8 @@ public: * @param[in] context Workload context within which the operator is running. * @param[in] src Source tensor info. * @param[in] dst Destination tensor info. + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, @@ -73,7 +74,9 @@ public: /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch. * - * Similar to @ref GpuOutput::create_op(). + * Parameters are similar to @ref GpuOutput::create_op(). + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h index 2b49a31191..69c7a3a76a 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h @@ -55,30 +55,32 @@ public: * * @param[in,out] sketch Workload sketch into which the operator will be fused * @param[in] src Input tensor info. Data type supported: All - * @param[out] dst Output info. Data type supported: Same as @p src * @param[in] attributes Operator attributes + * + * @return Pointer for the destination tensor info */ - static void create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const Attributes &attributes); + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const Attributes &attributes); /** Check if the operator configuration is supported, irrespective of fusion * * @param[in] context Workload context within which the operator is running * @param[in] src Input tensor info. - * @param[in] dst Output info. * @param[in] attributes Operator attributes + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes); /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch. - * Similar to @ref GpuReshape::create_op() + * + * Parameters are similar to @ref GpuReshape::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes); }; diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h index 1387bf1cf0..f9661c1c24 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,32 +64,32 @@ public: * * @param[in,out] sketch Workload sketch into which the operator will be fused * @param[in] src Left hand side tensor info. - * @param[out] dst Destination tensor info. - * If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes Operator attributes + * + * @return Pointer for the destination tensor info */ - static void create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const Attributes &attributes); + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const Attributes &attributes); /** Check if the operator configuration is supported, irrespective of fusion * - * @param[in] context Workload context within which the operator is running - * @param[in] src Left hand side tensor info. - * @param[out] dst Destination tensor info. - * If an uninitialized ITensorInfo is passed in, it will be auto-initialized - * @param[in] attributes Operator attributes + * @param[in] context Workload context within which the operator is running + * @param[in] src Left hand side tensor info. + * @param[in] attributes Operator attributes + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes); /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch. - * Similar to @ref GpuResize::create_op() + * + * Parameters are similar to @ref GpuResize::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes); }; diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 736ce9bf5b..9a218b3e75 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -56,11 +56,6 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack &t //Check data type for different elementwise operators ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16, DataType::S32, DataType::S16, DataType::U8); - const bool rhs_in_place = (rhs == dst); - const bool lhs_in_place = (lhs == dst); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_in_place && lhs_in_place, "Both LHS and RHS cannot be in-place at same time for any elementwise operation."); - // dst shape is correct const TensorShape out_shape = TensorShape::broadcast_shape(lhs->tensor_shape(), rhs->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp index 46033d842b..a02160cba8 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,6 @@ #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" -#include "src/common/utils/Log.h" - namespace arm_compute { namespace experimental @@ -36,37 +34,33 @@ namespace dynamic_fusion { Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, - const ITensorInfo *rhs, - const ITensorInfo *dst) + const ITensorInfo *rhs) { + // Set the elementwise operation to ADD then call the elementwise common validate_op ElementwiseBinaryCommonAttributes common_attributes{}; common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); - return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, dst, common_attributes); + return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); } Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, - const ITensorInfo *rhs, - const ITensorInfo *dst) + const ITensorInfo *rhs) { + // Set the elementwise operation to ADD then call the elementwise common is_supported_op ElementwiseBinaryCommonAttributes common_attributes{}; common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); - return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, dst, common_attributes); + return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); } -void GpuAdd::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs, - ITensorInfo *dst) +ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuAdd::validate_op(sketch, lhs, rhs, dst)); - ARM_COMPUTE_LOG_PARAMS(lhs, rhs, dst); - + // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op() // Set the elementwise operation to ADD then call the elementwise common create_op ElementwiseBinaryCommonAttributes common_attributes{}; common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); - GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, dst, common_attributes); + return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes); } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp index 3a5b64ad9c..33c2d43e07 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -38,18 +38,22 @@ namespace dynamic_fusion { namespace { -constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; -} -Status GpuCast::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst, - const CastAttributes &attributes) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const CastAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON(src == dst); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } + auto_init_if_empty(dst_info_to_validate, src->clone()->set_data_type(attributes.data_type())); // Check support level @@ -59,7 +63,7 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&dst_info_to_validate, + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, @@ -76,7 +80,7 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context, ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, src); - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentCast::validate(properties, arguments, attributes, settings)); } } @@ -87,17 +91,27 @@ Status GpuCast::is_supported_op(const GpuWorkloadContext &context, return Status{}; } +constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; +} // namespace + +Status GpuCast::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const CastAttributes &attributes) +{ + return is_supported_op_helper(context, src, nullptr, attributes); +} Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const CastAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); + + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; auto_init_if_empty(dst_info_to_validate, src->clone()->set_data_type(attributes.data_type())); // Perform fusion test @@ -110,18 +124,19 @@ Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuCast::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const CastAttributes &attributes) +ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const CastAttributes &attributes) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuCast::validate_op(sketch, src, dst, attributes)); - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_LOG_PARAMS(src, dst, attributes); + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, attributes); + ARM_COMPUTE_ERROR_THROW_ON(GpuCast::validate_op(sketch, src, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); // Auto initialize dst tensor info if empty auto_init_if_empty(*dst, src->clone()->set_data_type(attributes.data_type())); @@ -160,6 +175,8 @@ void GpuCast::create_op(GpuWorkloadSketch &sketch, const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp index ffef6115d6..89b533c9b8 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -41,24 +41,30 @@ namespace dynamic_fusion { namespace { -constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; -} // namespace - -Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst, - const ClampAttributes &attributes) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const ClampAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value"); - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } + auto_init_if_empty(dst_info_to_validate, *src->clone()); // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped - const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() }; + const ClComponentActivation::Attributes act_info + { + ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() + }; // Check components if(context.gpu_language() == GpuLanguage::OpenCL) @@ -68,7 +74,7 @@ Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC, src); - arguments.add_const_tensor(ACL_DST, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentActivation::validate(properties, arguments, act_info)); } else @@ -78,18 +84,29 @@ Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, return Status{}; } +constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; +} // namespace + +Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ClampAttributes &attributes) +{ + return is_supported_op_helper(context, src, nullptr, attributes); +} + Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const ClampAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); // Check if tensors have valid id, i.e. they are created from a sketch - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); + + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; auto_init_if_empty(dst_info_to_validate, *src->clone()); // Perform fusion test to check if the operator meets fusion constraints @@ -101,18 +118,19 @@ Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuClamp::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const ClampAttributes &attributes) +ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const ClampAttributes &attributes) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, dst, attributes)); - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_LOG_PARAMS(src, dst, attributes); + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, attributes); + ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); // Auto initialize dst tensor auto_init_if_empty(*dst, *src->clone()); @@ -121,7 +139,10 @@ void GpuClamp::create_op(GpuWorkloadSketch &sketch, GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph(); // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped - const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() }; + const ClComponentActivation::Attributes act_info + { + ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() + }; const auto *const sketch_ctx = sketch.implementation().context(); @@ -151,6 +172,8 @@ void GpuClamp::create_op(GpuWorkloadSketch &sketch, const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp index 7a8b97957e..690371f910 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp @@ -131,10 +131,8 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, { dst_info_to_validate_ptr = dst; } - else - { - calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes); - } + + calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes); // Check support level // Data type diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp index b08af61d8f..0f9e726604 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,20 +61,26 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, c } } -constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; -} // namespace - -Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *wei, - const ITensorInfo *bia, - const ITensorInfo *dst, - const DepthwiseConv2dAttributes &attributes) +/* A helper method to reduce the duplication in dst tensor initialization +* when calling validate() +*/ +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *wei, + const ITensorInfo *bia, + const ITensorInfo *dst, + const DepthwiseConv2dAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei); + + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes); // Check support level @@ -100,12 +106,12 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR); // Get the depthwise convolution compute parameters - auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); + auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier()); settings.fast_relaxed_math( (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) - && (dst_info_to_validate.data_type() == DataType::F32 || dst_info_to_validate.data_type() == DataType::F16)); + && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16)); settings.is_fma_available(get_arch_from_target(gpu_target) == GPUTarget::MIDGARD) .m0(dwc_info.m0) @@ -117,7 +123,7 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont arguments.add_const_tensor(ACL_SRC_0, src); arguments.add_const_tensor(ACL_SRC_1, wei); arguments.add_const_tensor(ACL_SRC_2, bia); - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings)); } } @@ -129,23 +135,36 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont return Status{}; } +constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; +} // namespace + +Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *wei, + const ITensorInfo *bia, + const DepthwiseConv2dAttributes &attributes) +{ + return is_supported_op_helper(context, src, wei, bia, nullptr, attributes); +} + Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *wei, const ITensorInfo *bia, - const ITensorInfo *dst, const DepthwiseConv2dAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst); - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id()); if(bia != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id()); } + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; + // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes); // Perform fusion test @@ -161,20 +180,21 @@ Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes); } -void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *wei, - ITensorInfo *bia, - ITensorInfo *dst, - const DepthwiseConv2dAttributes &attributes) +ITensorInfo *GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + ITensorInfo *wei, + ITensorInfo *bia, + const DepthwiseConv2dAttributes &attributes) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuDepthwiseConv2d::validate_op(sketch, src, wei, bia, dst, attributes)); - ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst); - ARM_COMPUTE_LOG_PARAMS(src, wei, bia, dst, attributes); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei); + ARM_COMPUTE_LOG_PARAMS(src, wei, bia, attributes); + ARM_COMPUTE_ERROR_THROW_ON(GpuDepthwiseConv2d::validate_op(sketch, src, wei, bia, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); calculate_and_init_dst_if_empty(dst, src, wei, attributes); @@ -197,7 +217,7 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch, attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR); // Get the depthwise convolution compute parameters - auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); + auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier()); settings.is_fma_available(get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) @@ -241,6 +261,8 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch, const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp index c906da8199..107a5e5fa7 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp @@ -41,7 +41,7 @@ namespace dynamic_fusion namespace { constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; -} +} // namespace Status GpuOutput::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp index 7a4063d554..f5645f325f 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp @@ -37,15 +37,21 @@ namespace dynamic_fusion { namespace { -GpuOperatorType operator_type = GpuOperatorType::Complex; -} - -Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst, - const Attributes &attributes) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const ReshapeAttributes &attributes) { - TensorInfo dst_info_to_validate = *dst; + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } + auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape())); // Check components @@ -57,7 +63,7 @@ Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, // Validate GpuReshape Component ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, src); - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentReshape::validate(arguments)); } @@ -68,16 +74,28 @@ Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, return Status{}; } + +GpuOperatorType operator_type = GpuOperatorType::Complex; +} // namespace + +Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const Attributes &attributes) +{ + return is_supported_op_helper(context, src, nullptr, attributes); +} + Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const Attributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); + + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape())); // Perform fusion test @@ -90,17 +108,20 @@ Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuReshape::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const Attributes &attributes) +ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const Attributes &attributes) { - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_LOG_PARAMS(src, dst, attributes.shape()); - ARM_COMPUTE_ERROR_THROW_ON(GpuReshape::validate_op(sketch, src, dst, attributes)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, attributes.shape()); + ARM_COMPUTE_ERROR_THROW_ON(GpuReshape::validate_op(sketch, src, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); + auto_init_if_empty(*dst, src->clone()->set_tensor_shape(attributes.shape())); // Translate into components and add to component graph @@ -136,7 +157,9 @@ void GpuReshape::create_op(GpuWorkloadSketch &sketch, const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion } // namespace experimental -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp index aa45f4c1a5..5f52eea7d0 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,17 +54,21 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, c } } -constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; -} -Status GpuResize::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst, - const Attributes &attributes) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *src, + const ITensorInfo *dst, + const ResizeAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; + + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes); // Check support level @@ -88,7 +92,7 @@ Status GpuResize::is_supported_op(const GpuWorkloadContext &context, ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, src); - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentResize::validate(properties, arguments, attributes)); } } @@ -100,16 +104,27 @@ Status GpuResize::is_supported_op(const GpuWorkloadContext &context, return Status{}; } +constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; +} // namespace + +Status GpuResize::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *src, + const Attributes &attributes) +{ + return is_supported_op_helper(context, src, nullptr, attributes); +} + Status GpuResize::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const ITensorInfo *dst, const GpuResize::Attributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); - // Auto initialize dst tensor info if empty - TensorInfo dst_info_to_validate = *dst; + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; + + // Auto initialize dst tensor info calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes); // Perform fusion test @@ -123,18 +138,19 @@ Status GpuResize::validate_op(const GpuWorkloadSketch &sketch, "Operator fusion test failed. This operator cannot be fused into the workload"); // Check if configuration is supported - return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); + return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuResize::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const GpuResize::Attributes &attributes) +ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const GpuResize::Attributes &attributes) { - // Assert validation - ARM_COMPUTE_ERROR_THROW_ON(GpuResize::validate_op(sketch, src, dst, attributes)); - ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); - ARM_COMPUTE_LOG_PARAMS(src, dst, attributes); + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_LOG_PARAMS(src, attributes); + ARM_COMPUTE_ERROR_THROW_ON(GpuResize::validate_op(sketch, src, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); // Auto initialize dst tensor info if empty calculate_and_init_dst_if_empty(dst, src, attributes); @@ -172,6 +188,8 @@ void GpuResize::create_op(GpuWorkloadSketch &sketch, const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp index aec22e100c..7c087c9a7b 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" +#include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" @@ -43,30 +44,23 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *lhs, c auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(broadcast_pair.first)); } } -GpuOperatorType operator_type = GpuOperatorType::Simple; -} -ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation) +Status is_supported_op_helper(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs, + const ITensorInfo *dst, + const ElementwiseBinaryCommonAttributes &attributes) { - _operation = operation; - return *this; -} + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); -ElementwiseBinaryCommonAttributes::ElementwiseOp ElementwiseBinaryCommonAttributes::operation() const -{ - return _operation; -} + TensorInfo dst_info_to_validate; + const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; -Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *lhs, - const ITensorInfo *rhs, - const ITensorInfo *dst, - const ElementwiseBinaryCommonAttributes &attributes) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst); + if(dst != nullptr) + { + dst_info_to_validate_ptr = dst; + } - // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs); // Check components @@ -80,16 +74,8 @@ Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, lhs); arguments.add_const_tensor(ACL_SRC_1, rhs); + arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); - // We needed to pass the original dst pointer for in-place detection, in case its shape is not empty - if(dst->tensor_shape().total_size() == 0) - { - arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate); - } - else - { - arguments.add_const_tensor(ACL_DST_0, dst); - } ARM_COMPUTE_RETURN_ON_ERROR(ClComponentElementwiseBinary::validate(arguments, attributes)); } } @@ -101,18 +87,40 @@ Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext return Status{}; } +GpuOperatorType operator_type = GpuOperatorType::Simple; +} // namespace + +ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation) +{ + _operation = operation; + return *this; +} + +ElementwiseBinaryCommonAttributes::ElementwiseOp ElementwiseBinaryCommonAttributes::operation() const +{ + return _operation; +} + +Status GpuElementwiseBinaryCommon::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs, + const ElementwiseBinaryCommonAttributes &attributes) +{ + return is_supported_op_helper(context, lhs, rhs, nullptr, attributes); +} + Status GpuElementwiseBinaryCommon::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs, - const ITensorInfo *dst, const ElementwiseBinaryCommonAttributes &attributes) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst); - ARM_COMPUTE_RETURN_ERROR_ON( - !lhs->has_valid_id() || !rhs->has_valid_id() || !dst->has_valid_id()); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON(!lhs->has_valid_id() || !rhs->has_valid_id()); + + // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object + TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - TensorInfo dst_info_to_validate = *dst; calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs); // Perform fusion test @@ -125,20 +133,21 @@ Status GpuElementwiseBinaryCommon::validate_op(const GpuWorkloadSketch ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op), "Operator fusion test failed. This operator cannot be fused into the workload"); - // Check if configuration is supported, and passing the original dst for in-place detection - return is_supported_op(*sketch.gpu_context(), lhs, rhs, dst, attributes); + // Check if configuration is supported + return is_supported_op_helper(*sketch.gpu_context(), lhs, rhs, &dst_info_to_validate, attributes); } -void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs, - ITensorInfo *dst, - const ElementwiseBinaryCommonAttributes &attributes) +ITensorInfo *GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs, + const ElementwiseBinaryCommonAttributes &attributes) { - ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst); - const bool in_place = (lhs == dst) || (rhs == dst); - static TensorInfo in_place_dst; - in_place_dst = in_place ? sketch.create_tensor_info(*lhs) : TensorInfo{}; + ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_LOG_PARAMS(lhs, rhs); + ARM_COMPUTE_ERROR_THROW_ON(GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, attributes)); + + ITensorInfo *dst = sketch.implementation().create_virtual_tensor(); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); // Auto initialize dst tensor calculate_and_init_dst_if_empty(dst, lhs, rhs); @@ -160,14 +169,7 @@ void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch ArgumentPack arguments; arguments.add_const_tensor(ACL_SRC_0, lhs); arguments.add_const_tensor(ACL_SRC_1, rhs); - if(in_place) - { - arguments.add_const_tensor(ACL_DST_0, &in_place_dst); - } - else - { - arguments.add_const_tensor(ACL_DST_0, dst); - } + arguments.add_const_tensor(ACL_DST_0, dst); comp_graph.add_new_component(properties, arguments, attributes); } } @@ -183,16 +185,11 @@ void GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch ArgumentPack tensors; tensors.add_const_tensor(ACL_SRC_0, lhs); tensors.add_const_tensor(ACL_SRC_1, rhs); - if(in_place) - { - tensors.add_const_tensor(ACL_DST_0, &in_place_dst); - } - else - { - tensors.add_tensor(ACL_DST_0, dst); - } + tensors.add_tensor(ACL_DST_0, dst); const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors); sketch.implementation().operator_group().add_operator(op); + + return dst; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h index b00d069389..cbefa379e6 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h +++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h @@ -25,6 +25,7 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_INTERNAL_GPUELEMENTWISEBINARYCOMMON #include "arm_compute/core/Error.h" +#include "arm_compute/core/ITensorInfo.h" namespace arm_compute { @@ -76,34 +77,36 @@ public: * @param[in,out] sketch Workload sketch into which the operator will be fused * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32. * @param[in] rhs Right hand side tensor info. Data types supported: U8/S16/S32/F16/F32. - * @param[out] dst Destination tensor info. Data types supported: U8/S16/S32/F16/F32. If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes ElementwiseBinaryCommonAttributes containing the operator type: ADD, SUB, DIV, ... etc. + * + * @return Pointer for the destination tensor info */ - static void create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs, - ITensorInfo *dst, - const ElementwiseBinaryCommonAttributes &attributes); + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs, + const ElementwiseBinaryCommonAttributes &attributes); /** Check if the operator configuration is supported, irrespective of fusion * * @param[in] context Workload context within which the operator is running * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32. * @param[in] rhs Right hand side tensor info. Data types supported: U8/S16/S32/F16/F32. - * @param[in] dst Destination tensor info. Data types supported: U8/S16/S32/F16/F32. If an uninitialized ITensorInfo is passed in, it will be auto-initialized * @param[in] attributes ElementwiseBinaryCommonAttributes containing the operator type: ADD, SUB, DIV, ... etc. + * + * @return Status */ static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs, - const ITensorInfo *dst, const ElementwiseBinaryCommonAttributes &attributes); /** Validate the operator and check if it can be fused into the workload sketch. - * Similar to @ref GpuElementwiseBinaryCommon::create_op() + * + * Parameters are similar to @ref GpuElementwiseBinaryCommon::create_op() + * + * @return Status */ static Status validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *rhs, const ITensorInfo *lhs, - const ITensorInfo *dst, const ElementwiseBinaryCommonAttributes &attributes); }; } // namespace dynamic_fusion diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp index a70f512f9f..7f2d439183 100644 --- a/tests/validation/dynamic_fusion/gpu/Integration.cpp +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -158,20 +158,17 @@ TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - auto in_0_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - auto in_1_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - auto in_2_info = sketch.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_0_info = sketch.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_1_info = sketch.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_2_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - auto out_0_info = sketch.create_tensor_info(); - auto out_1_info = sketch.create_tensor_info(); + TensorInfo out_0_info = sketch.create_tensor_info(); + TensorInfo out_1_info = sketch.create_tensor_info(); - auto ans_0_info = sketch.create_tensor_info(); - auto ans_1_info = sketch.create_tensor_info(); - - GpuAdd::create_op(sketch, &in_0_info, &in_1_info, &ans_0_info); - GpuOutput::create_op(sketch, &ans_0_info, &out_0_info); - GpuAdd::create_op(sketch, &ans_0_info, &in_2_info, &ans_1_info); - GpuOutput::create_op(sketch, &ans_1_info, &out_1_info); + ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, &in_0_info, &in_1_info); + GpuOutput::create_op(sketch, ans_0_info, &out_0_info); + ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, &in_2_info); + GpuOutput::create_op(sketch, ans_1_info, &out_1_info); // Configure runtime ClWorkloadRuntime runtime; @@ -257,17 +254,12 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - auto in_0_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - auto in_1_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - auto in_2_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - - auto out_0_info = sketch.create_tensor_info(); - auto out_1_info = sketch.create_tensor_info(); + TensorInfo in_0_info = sketch.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_1_info = sketch.create_tensor_info(t_input_shape, 1, data_type); + TensorInfo in_2_info = sketch.create_tensor_info(t_input_shape, 1, data_type); - auto ans_0_info = sketch.create_tensor_info(); - auto ans_1_info = sketch.create_tensor_info(); - auto ans_2_info = sketch.create_tensor_info(); - auto ans_3_info = sketch.create_tensor_info(); + TensorInfo out_0_info = sketch.create_tensor_info(); + TensorInfo out_1_info = sketch.create_tensor_info(); CastAttributes cast_0_attr; cast_0_attr.data_type(DataType::S32).convert_policy(ConvertPolicy::SATURATE); @@ -275,12 +267,12 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) CastAttributes cast_1_attr; cast_1_attr.data_type(DataType::F32).convert_policy(ConvertPolicy::SATURATE); - GpuAdd::create_op(sketch, &in_0_info, &in_1_info, &ans_0_info); - GpuOutput::create_op(sketch, &ans_0_info, &out_0_info); - GpuAdd::create_op(sketch, &ans_0_info, &in_2_info, &ans_1_info); - GpuCast::create_op(sketch, &ans_1_info, &ans_2_info, cast_0_attr); - GpuCast::create_op(sketch, &ans_2_info, &ans_3_info, cast_1_attr); - GpuOutput::create_op(sketch, &ans_3_info, &out_1_info); + ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, &in_0_info, &in_1_info); + GpuOutput::create_op(sketch, ans_0_info, &out_0_info); + ITensorInfo *ans_1_info = GpuAdd::create_op(sketch, ans_0_info, &in_2_info); + ITensorInfo *ans_2_info = GpuCast::create_op(sketch, ans_1_info, cast_0_attr); + ITensorInfo *ans_3_info = GpuCast::create_op(sketch, ans_2_info, cast_1_attr); + GpuOutput::create_op(sketch, ans_3_info, &out_1_info); // Configure runtime ClWorkloadRuntime runtime; diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp index 1451ab3de8..0385407ad2 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp @@ -48,7 +48,7 @@ TEST_SUITE(ADD) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Add @@ -71,41 +71,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), - TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), - TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), - })), framework::dataset::make("Expected", { true, false, true, true, false, true, true, false, false, true})), - input1_info, input2_info, output_info, expected) -{ - // Create a new workload sketch - auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &gpu_ctx }; - - // Fuse Elementwise Add - auto lhs_info = sketch.create_tensor_info(input1_info); - auto rhs_info = sketch.create_tensor_info(input2_info); - auto dst_info = sketch.create_tensor_info(output_info); - bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &dst_info)); - ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); -} - -DATA_TEST_CASE(ValidateRhsInplace, framework::DatasetMode::ALL, zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - }), - framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting not allowed for rhs - })), - framework::dataset::make("Expected", { true, false})), input1_info, input2_info, expected) { // Create a new workload sketch @@ -116,29 +82,8 @@ DATA_TEST_CASE(ValidateRhsInplace, framework::DatasetMode::ALL, zip(zip( // Fuse Elementwise Add auto lhs_info = sketch.create_tensor_info(input1_info); auto rhs_info = sketch.create_tensor_info(input2_info); - bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &rhs_info)); - ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); -} - -DATA_TEST_CASE(ValidateLhsInplace, framework::DatasetMode::ALL, zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting not allowed for lhs - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - }), - framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs - })), - framework::dataset::make("Expected", { false, true})), - input1_info, input2_info, expected) -{ - // Create a new workload sketch - auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); - auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; - GpuWorkloadSketch sketch{ &gpu_ctx }; - // Fuse Elementwise Add - auto lhs_info = sketch.create_tensor_info(input1_info); - auto rhs_info = sketch.create_tensor_info(input2_info); - bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info, &lhs_info)); + bool res = bool(GpuAdd::validate_op(sketch, &lhs_info, &rhs_info)); ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); } // clang-format on diff --git a/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp b/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp index 947201ff97..177c02c2c7 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -51,33 +51,21 @@ TEST_SUITE(DYNAMIC_FUSION) TEST_SUITE(CLAMP) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching shapes TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Minimum value larger than maximum value }), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16), - TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32), - })), framework::dataset::make("MinVal", { 0.2f, 1.5f, - 0.1f, - 3.0f, 9.0f, })), framework::dataset::make("MaxVal", { 0.5f, 2.0f, 1.0f, - 4.0f, - 1.0f, })), - framework::dataset::make("Expected", { true, true, false, false, false })), - input_info, output_info, min_val, max_val, expected) + framework::dataset::make("Expected", { true, true, false })), + input_info, min_val, max_val, expected) { // Create a new workload sketch CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); @@ -86,13 +74,12 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( // Fuse Clamp const TensorInfo src_info = sketch.create_tensor_info(input_info); - const TensorInfo dst_info = sketch.create_tensor_info(output_info); ClampAttributes attributes {}; attributes.min_val(min_val) .max_val(max_val); - const bool res = static_cast(GpuClamp::validate_op(sketch, &src_info, &dst_info, attributes)); + const bool res = static_cast(GpuClamp::validate_op(sketch, &src_info, attributes)); ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); } // clang-format on diff --git a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp index f08cc60ea2..b6331d70c8 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,31 +53,29 @@ constexpr float tolerance_num = 0.02f; /**< T // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching data type input/weights - TensorInfo(TensorShape(3U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching input feature maps - TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching depth multiplier - TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases size - TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases dimensions - TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid output size - TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // patch size bigger than input width - TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // dilation < 1 +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( // Explanations of failing tests + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching data type input/weights + TensorInfo(TensorShape(3U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching input feature maps + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Mismatching depth multiplier + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases size + TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Invalid biases dimensions + TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // dilation < 1 TensorInfo(TensorShape(8U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM16, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM16, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U8, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S8, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U16, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S16, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U32, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S32, DataLayout::NHWC), // Unsupported data type - TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::QASYMM16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S8, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S16, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::U32, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(8U, 32U, 13U), 1, DataType::S32, DataLayout::NHWC), // Unsupported data type + TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::F32, DataLayout::NCHW), // Unsupported data layout TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), // weight dimension > 3 + TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), // weight dimension > 3 TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(8U, 32U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC), @@ -87,8 +85,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(16U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(24U, 3U, 3U), 1, DataType::QASYMM8, DataLayout::NHWC), @@ -115,8 +111,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(2U, 2U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(2U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(16U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(24U), 1, DataType::S32, DataLayout::NHWC), @@ -138,34 +132,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), TensorInfo(TensorShape(24U), 1, DataType::F32, DataLayout::NHWC), })), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(2U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(16U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(16U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(16U, 25U, 11U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QASYMM8, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QASYMM8_SIGNED, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QSYMM16, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QSYMM8, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QSYMM8_PER_CHANNEL, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::QASYMM16, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::U8, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::S8, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::U16, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::S16, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::U32, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U), 1, DataType::S32, DataLayout::NHWC), - TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::F32, DataLayout::NCHW), - TensorInfo(TensorShape(24U, 32U, 11U, 4U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 32U, 11U, 4U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 33U, 14U, 4U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 17U, 5U, 4U), 1, DataType::F32, DataLayout::NHWC), - TensorInfo(TensorShape(24U, 15U, 4U, 4U), 1, DataType::F32, DataLayout::NHWC), - })), framework::dataset::make("Padding", { Padding2D(0, 0, 0, 0), Padding2D(0, 0, 0, 0), Padding2D(0, 0, 0, 0), @@ -173,8 +139,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi Padding2D(0, 0, 0, 0), Padding2D(0, 0, 0, 0), Padding2D(0, 0, 0, 0), - Padding2D(0, 0, 0, 0), - Padding2D(0, 0, 0, 0), Padding2D(1, 1, 0, 0), Padding2D(1, 1, 0, 0), Padding2D(1, 1, 0, 0), @@ -217,8 +181,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi Size2D(1, 1), Size2D(1, 1), Size2D(1, 1), - Size2D(1, 1), - Size2D(1, 1), Size2D(2, 3), Size2D(2, 3), })), @@ -227,8 +189,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi 3, 1, 1, - 1, - 2, 2, 2, 3, @@ -255,8 +215,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi Size2D(1U, 1U), Size2D(1U, 1U), Size2D(1U, 1U), - Size2D(1U, 1U), - Size2D(20U, 1U), Size2D(0U, 1U), Size2D(1U, 1U), Size2D(1U, 1U), @@ -278,10 +236,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi Size2D(1U, 1U), Size2D(2U, 3U), })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, true, false, + framework::dataset::make("Expected", { false, false, false, false, false, false, true, false, false, false, false, false, false, false, false, false, false, false, false, false, true, false, true, true, true })), - input_info, weights_info, biases_info, output_info, padding, stride, depth_multiplier, dilation, expected) + input_info, weights_info, biases_info, padding, stride, depth_multiplier, dilation, expected) { CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); GpuWorkloadContext gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; @@ -290,7 +248,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); const TensorInfo sketch_weights_info = sketch.create_tensor_info(weights_info); const TensorInfo sketch_biases_info = sketch.create_tensor_info(biases_info); - const TensorInfo sketch_output_info = sketch.create_tensor_info(output_info); DepthwiseConv2dAttributes attributes {}; attributes.pad(padding) @@ -298,7 +255,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi .dilation(dilation) .depth_multiplier(depth_multiplier); - const Status status = GpuDepthwiseConv2d::validate_op(sketch, &sketch_input_info, &sketch_weights_info, &sketch_biases_info, &sketch_output_info, attributes); + const Status status = GpuDepthwiseConv2d::validate_op(sketch, &sketch_input_info, &sketch_weights_info, &sketch_biases_info, attributes); const bool res = bool(status); ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); } diff --git a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp index b7a71db88a..51822b045a 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp @@ -44,17 +44,15 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), // mismatching dimensions - TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F16), // mismatching types }), -framework::dataset::make("OutputInfo", +framework::dataset::make("OutputShape", { - TensorInfo(TensorShape(9U, 5U, 21U), 1, DataType::F32), - TensorInfo(TensorShape(8U, 24U, 4U), 1, DataType::F32), - TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), - TensorInfo(TensorShape(9U, 5U, 21U), 1, DataType::F32), + TensorShape(9U, 5U, 21U), + TensorShape(8U, 24U, 4U), + TensorShape(192U, 192U), })), -framework::dataset::make("Expected", { true, true, false, false })), -input_info, output_info, expected) +framework::dataset::make("Expected", { true, true, false })), +input_info, output_shape, expected) { // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); @@ -62,13 +60,12 @@ input_info, output_info, expected) GpuWorkloadSketch sketch{ &gpu_ctx }; // Create sketch tensors - auto input_shape = input_info.tensor_shape(); - auto output_shape = output_info.tensor_shape(); - auto src_info = sketch.create_tensor_info(input_info); - auto dst_info = sketch.create_tensor_info(output_info); + TensorShape input_shape = input_info.tensor_shape(); + TensorInfo src_info = sketch.create_tensor_info(input_info); + ReshapeAttributes attributes; attributes.shape(output_shape); - Status status = GpuReshape::validate_op(sketch, &src_info, &dst_info, attributes); + Status status = GpuReshape::validate_op(sketch, &src_info, attributes); ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); } diff --git a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp index 3791aef44c..696be54c92 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2022 Arm Limited. +* Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -86,8 +86,8 @@ TEST_SUITE(Validate) const auto default_input_shape = TensorShape{ 2, 3, 3, 2 }; const auto default_output_shape = TensorShape{ 4, 6, 3, 2 }; -constexpr auto default_data_type = DataType::U8; -constexpr auto default_data_layout = DataLayout::NHWC; +constexpr auto default_data_type = DataType::U8; +constexpr auto default_data_layout = DataLayout::NHWC; TEST_CASE(NullPtr, framework::DatasetMode::ALL) { @@ -98,15 +98,10 @@ TEST_CASE(NullPtr, framework::DatasetMode::ALL) GpuWorkloadContext gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); - const TensorInfo sketch_output_info = sketch.create_tensor_info(output_info); + const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); // nullptr is given as input - Status status = GpuResize::validate_op(sketch, nullptr, &sketch_output_info, ResizeAttributes()); - ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); - - // nullptr is given as output - status = GpuResize::validate_op(sketch, &sketch_input_info, nullptr, ResizeAttributes()); + Status status = GpuResize::validate_op(sketch, nullptr, ResizeAttributes()); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -137,18 +132,19 @@ TEST_CASE(SupportDataType, framework::DatasetMode::ALL) for(auto &kv : supported_data_types) { - const TensorInfo input_info = TensorInfo{ default_input_shape, 1, kv.first, default_data_layout }; - const TensorInfo output_info = TensorInfo{ default_output_shape, 1, kv.first, default_data_layout }; + const TensorInfo input_info = TensorInfo{ default_input_shape, 1, kv.first, default_data_layout }; CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); GpuWorkloadContext gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); - const TensorInfo sketch_output_info = sketch.create_tensor_info(output_info); + const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); + + ResizeAttributes attributes; + attributes.output_width(default_output_shape[0]); // shape is not important unless it's empty + attributes.output_height(default_output_shape[1]); - // nullptr is given as input - Status status = GpuResize::validate_op(sketch, &sketch_input_info, &sketch_output_info, ResizeAttributes()); + Status status = GpuResize::validate_op(sketch, &sketch_input_info, attributes); ARM_COMPUTE_EXPECT(bool(status) == kv.second, framework::LogLevel::ERRORS); } } @@ -164,10 +160,9 @@ TEST_CASE(MismatchingDataType, framework::DatasetMode::ALL) GpuWorkloadContext gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); - const TensorInfo sketch_output_info = sketch.create_tensor_info(output_info); + const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); - Status status = GpuResize::validate_op(sketch, &sketch_input_info, &sketch_output_info, ResizeAttributes()); + Status status = GpuResize::validate_op(sketch, &sketch_input_info, ResizeAttributes()); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -185,15 +180,14 @@ TEST_CASE(AlignedCornerNotSupported, framework::DatasetMode::ALL) GpuWorkloadContext gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); - const TensorInfo sketch_output_info = sketch.create_tensor_info(output_info); + const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); ResizeAttributes attributes{}; attributes.interpolation_policy(interpolation_policy) .sampling_policy(sampling_policy) .align_corners(align_corners); - Status status = GpuResize::validate_op(sketch, &sketch_input_info, &sketch_output_info, attributes); + Status status = GpuResize::validate_op(sketch, &sketch_input_info, attributes); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -207,13 +201,12 @@ TEST_CASE(UnsupportedInterpolationPolicy, framework::DatasetMode::ALL) GpuWorkloadContext gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); - const TensorInfo sketch_output_info = sketch.create_tensor_info(output_info); + const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); ResizeAttributes attributes{}; attributes.interpolation_policy(interpolation_policy); - Status status = GpuResize::validate_op(sketch, &sketch_input_info, &sketch_output_info, attributes); + Status status = GpuResize::validate_op(sketch, &sketch_input_info, attributes); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } @@ -227,13 +220,12 @@ TEST_CASE(UnsupportedLayout, framework::DatasetMode::ALL) GpuWorkloadContext gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); - const TensorInfo sketch_output_info = sketch.create_tensor_info(output_info); + const TensorInfo sketch_input_info = sketch.create_tensor_info(input_info); ResizeAttributes attributes{}; attributes.interpolation_policy(interpolation_policy); - Status status = GpuResize::validate_op(sketch, &sketch_input_info, &sketch_output_info, attributes); + Status status = GpuResize::validate_op(sketch, &sketch_input_info, attributes); ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS); } diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h index 630b664b78..235c8602b1 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -130,15 +130,13 @@ protected: GpuWorkloadSketch sketch{ &gpu_ctx }; // Create sketch tensors - auto input_info = sketch.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); - auto weight_info = sketch.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); - auto bias_info = sketch.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); - auto dst_info = sketch.create_tensor_info(); + TensorInfo input_info = sketch.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); + TensorInfo weight_info = sketch.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); + TensorInfo bias_info = sketch.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); + TensorInfo dst_info = sketch.create_tensor_info(); - auto ans_info = sketch.create_tensor_info(); - - FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, &ans_info, dwc_conv2d_attr); - GpuOutput::create_op(sketch, &ans_info, &dst_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, dwc_conv2d_attr); + GpuOutput::create_op(sketch, ans_info, &dst_info); // Configure runtime ClWorkloadRuntime runtime; diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h index f97b541ce3..e2722a1bdc 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -105,28 +105,23 @@ protected: GpuWorkloadSketch sketch{ &gpu_ctx }; // Fuse first element wise binary Op - auto lhs_info = sketch.create_tensor_info(shape0, 1, _data_type); - auto rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type)); - - auto ans_info = sketch.create_tensor_info(); - auto dst_info = sketch.create_tensor_info(); + TensorInfo lhs_info = sketch.create_tensor_info(shape0, 1, _data_type); + TensorInfo rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + TensorInfo dst_info = sketch.create_tensor_info(); TensorInfo rhs_info_fuse; - TensorInfo ans2_info; - FunctionType::create_op(sketch, &lhs_info, &rhs_info, &ans_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, &lhs_info, &rhs_info); if(_fuse) { - rhs_info_fuse = sketch.create_tensor_info(shape2, 1, _data_type); - ans2_info = sketch.create_tensor_info(); - - FunctionType::create_op(sketch, &ans_info, &rhs_info_fuse, &ans2_info); - GpuOutput::create_op(sketch, &ans2_info, &dst_info); + rhs_info_fuse = sketch.create_tensor_info(shape2, 1, _data_type); + ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, &rhs_info_fuse); + GpuOutput::create_op(sketch, ans2_info, &dst_info); } else { - GpuOutput::create_op(sketch, &ans_info, &dst_info); + GpuOutput::create_op(sketch, ans_info, &dst_info); } // Configure runtime diff --git a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h index 418cf4fe04..bd999027b3 100644 --- a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -122,10 +122,8 @@ protected: CastAttributes attributes; attributes.convert_policy(policy).data_type(dt_out); - auto ans_info = sketch.create_tensor_info(); - - FunctionType::create_op(sketch, &src_info, &ans_info, attributes); - GpuOutput::create_op(sketch, &ans_info, &dst_info); + ITensorInfo *ans_info = FunctionType::create_op(sketch, &src_info, attributes); + GpuOutput::create_op(sketch, ans_info, &dst_info); // Configure runtime ClWorkloadRuntime runtime; diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h index fe87d9a022..a1fd22582f 100644 --- a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -94,8 +94,8 @@ protected: template void fill(U &&tensor) { - float min_bound = 0; - float max_bound = 0; + float min_bound = 0; + float max_bound = 0; std::tie(min_bound, max_bound) = get_activation_layer_test_bounds(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, _data_type); library->fill_static_values(tensor, get_boundary_values(static_cast(min_bound), static_cast(max_bound))); } @@ -111,21 +111,15 @@ protected: TensorInfo src_info = sketch.create_tensor_info(TensorInfo(shape, 1, _data_type)); TensorInfo dst_info = sketch.create_tensor_info(TensorInfo(shape, 1, _data_type)); - auto ans_0_info = sketch.create_tensor_info(); - TensorInfo ans_1_info; - - FunctionType::create_op(sketch, &src_info, &ans_0_info, attributes); - + ITensorInfo *ans_0_info = FunctionType::create_op(sketch, &src_info, attributes); if(_fuse) { - ans_1_info = sketch.create_tensor_info(); - - FunctionType::create_op(sketch, &ans_0_info, &ans_1_info, attributes); - GpuOutput::create_op(sketch, &ans_1_info, &dst_info); + ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, attributes); + GpuOutput::create_op(sketch, ans_1_info, &dst_info); } else { - GpuOutput::create_op(sketch, &ans_0_info, &dst_info); + GpuOutput::create_op(sketch, ans_0_info, &dst_info); } // Configure runtime diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h index a427d814cb..0d3b1f0296 100644 --- a/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h @@ -75,13 +75,13 @@ protected: GpuWorkloadSketch sketch{ &gpu_ctx }; // Create sketch tensors - auto src_info = sketch.create_tensor_info(TensorInfo(input_shape, 1, data_type)); - auto ans_info = sketch.create_tensor_info(TensorInfo(output_shape, 1, data_type)); - auto dst_info = sketch.create_tensor_info(TensorInfo(output_shape, 1, data_type)); + TensorInfo src_info = sketch.create_tensor_info(TensorInfo(input_shape, 1, data_type)); + TensorInfo dst_info = sketch.create_tensor_info(TensorInfo(output_shape, 1, data_type)); ReshapeAttributes attributes; attributes.shape(output_shape); - FunctionType::create_op(sketch, &src_info, &ans_info, attributes); - GpuOutput::create_op(sketch, &ans_info, &dst_info); + + ITensorInfo *ans_info = FunctionType::create_op(sketch, &src_info, attributes); + GpuOutput::create_op(sketch, ans_info, &dst_info); // Configure runtime ClWorkloadRuntime runtime; diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h index 5cdf52e62b..7eb820e0eb 100644 --- a/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2022 Arm Limited. +* Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -148,10 +148,8 @@ protected: ResizeAttributes attributes; attributes.align_corners(_align_corners).sampling_policy(_sampling_policy).interpolation_policy(_interpolation_policy).output_width(_output_width).output_height(_output_height); - TensorInfo scale_result_info = sketch.create_tensor_info(); - - FunctionType::create_op(sketch, &src_info, &scale_result_info, attributes); - GpuOutput::create_op(sketch, &scale_result_info, &dst_info); + ITensorInfo *scale_result_info = FunctionType::create_op(sketch, &src_info, attributes); + GpuOutput::create_op(sketch, scale_result_info, &dst_info); // Configure runtime ClWorkloadRuntime runtime; -- cgit v1.2.1