From 2b9fa593a0a172bf36a02b5cdb840c6b9b361d7c Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Wed, 17 Jan 2024 16:07:03 +0000 Subject: Use the stable CKW API in the GPU dynamic fusion backend - Refactor all kernels to work with the CKW stable API - Add support for sub-tile in the op_load/op_store CKW operator - Fix mismatch in resize - Add comments in all kernels written with CKW to help developers understand the structure of the code - Add texture image support in depthwise convolution written with CKW - Add support for different block sizes in depthwise convolution - Remove the use of the dynamic fusion helper functions. - Add support for floor in the op_unary() of CKW Resolves: COMPMID-6708, COMPMID-6743, COMPMID-6530 Signed-off-by: Gian Marco Iodice Signed-off-by: Gunes Bayir Signed-off-by: Viet-Hoa Do Signed-off-by: Jakub Sujak Change-Id: I8104ce4d04a3138a1aeb0b84940e1f1c89e76069 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10914 Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp | 5 ++--- src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp | 10 +++------- src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp | 18 +++++------------- src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp | 6 ++---- src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp | 8 +++----- 5 files changed, 15 insertions(+), 32 deletions(-) (limited to 'src/dynamic_fusion/sketch/gpu/operators') diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp index 2cec67dc65..201c9f243c 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,8 +49,7 @@ Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *l Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, - DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); // Set the elementwise operation to Add then call the elementwise common is_supported_op diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp index 6f35e66ea8..d25a2a3153 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -57,12 +57,8 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, // Check support level // Data Type - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - src, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, - DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr, 1, DataType::U8, DataType::S8, - DataType::QASYMM8, DataType::S16, DataType::U16, DataType::U32, - DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr, 1, DataType::F16, DataType::F32); if (context.gpu_language() == GpuLanguage::OpenCL) { diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp index 55c604aacc..2d04f75610 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,6 +25,7 @@ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" #include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/Error.h" #include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/Validate.h" @@ -52,10 +53,12 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const Pool2dAttributes &attributes, const GpuPool2dSettings &settings) { + ARM_COMPUTE_UNUSED(settings); + if (dst->total_size() == 0U) { auto shape = misc::shape_calculator::compute_pool_shape( - *src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())); + *src, convert_pool_attr_to_pool_info(attributes, /* mixed_precision */ true)); auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape)); } } @@ -63,17 +66,6 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; } // namespace -GpuPool2dSettings &GpuPool2dSettings::mixed_precision(bool mixed_precision) -{ - _mixed_precision = mixed_precision; - return *this; -} - -bool GpuPool2dSettings::mixed_precision() const -{ - return _mixed_precision; -} - GpuPool2dSettings GpuPool2dSettings::use_inf_as_limit(bool use_inf_as_limit) { _use_inf_as_limit = use_inf_as_limit; diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp index fb09875b33..8e794c88b2 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -60,7 +60,6 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, const ResizeAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); - TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; @@ -73,8 +72,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, // Check support level // Data type - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, - DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); // Data layout ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); // Interpolation policy diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp index e5d62c9930..c53453a15c 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -36,8 +36,7 @@ namespace dynamic_fusion Status GpuSub::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, - DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); // Set the elementwise operation to Sub then call the elementwise common validate_op @@ -49,8 +48,7 @@ Status GpuSub::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *l Status GpuSub::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, - DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); // Set the elementwise operation to Sub then call the elementwise common is_supported_op -- cgit v1.2.1