aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2023-01-19 15:56:00 +0000
committerGunes Bayir <gunes.bayir@arm.com>2023-01-24 09:40:01 +0000
commitcc2877368d5e15d9ea89d31c84ec651fc0fffd13 (patch)
treec57a3a406125b3a31e2d4aff6126ce99f4ade395 /src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
parenta6a153817302793732e28b07c3b4046df3f91a60 (diff)
downloadComputeLibrary-cc2877368d5e15d9ea89d31c84ec651fc0fffd13.tar.gz
Change dynamic fusion API to return destination tensor info
The new dynamic fusion API is introduced in the following patch: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8906 For each operator (except Conv2D, which is migrated in the above patch), we - remove destination tensor from is_supported, validate and create calls - make create_op return ITensorInfo* to the intermediate destination object Affected operators: - DepthwiseConv2D - Cast - Elementwise Ops - Clamp - Reshape - Resize Resolves: COMPMID-5777 Change-Id: Ib60ec8a5f081752808455d7a7d790f2ed0627059 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8991 Reviewed-by: Ramy Elgammal <ramy.elgammal@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Dynamic-Fusion: Ramy Elgammal <ramy.elgammal@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp86
1 files changed, 54 insertions, 32 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
index b08af61d8f..0f9e726604 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,20 +61,26 @@ void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, c
}
}
-constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
-} // namespace
-
-Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *src,
- const ITensorInfo *wei,
- const ITensorInfo *bia,
- const ITensorInfo *dst,
- const DepthwiseConv2dAttributes &attributes)
+/* A helper method to reduce the duplication in dst tensor initialization
+* when calling validate()
+*/
+Status is_supported_op_helper(const GpuWorkloadContext &context,
+ const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const ITensorInfo *bia,
+ const ITensorInfo *dst,
+ const DepthwiseConv2dAttributes &attributes)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei);
+
+ TensorInfo dst_info_to_validate;
+ const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
+
+ if(dst != nullptr)
+ {
+ dst_info_to_validate_ptr = dst;
+ }
- // Auto initialize dst tensor info
- TensorInfo dst_info_to_validate = *dst;
calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes);
// Check support level
@@ -100,12 +106,12 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont
attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
// Get the depthwise convolution compute parameters
- auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+ auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
settings.fast_relaxed_math(
(gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
- && (dst_info_to_validate.data_type() == DataType::F32 || dst_info_to_validate.data_type() == DataType::F16));
+ && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16));
settings.is_fma_available(get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
.m0(dwc_info.m0)
@@ -117,7 +123,7 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont
arguments.add_const_tensor(ACL_SRC_0, src);
arguments.add_const_tensor(ACL_SRC_1, wei);
arguments.add_const_tensor(ACL_SRC_2, bia);
- arguments.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
+ arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings));
}
}
@@ -129,23 +135,36 @@ Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &cont
return Status{};
}
+constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
+} // namespace
+
+Status GpuDepthwiseConv2d::is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const ITensorInfo *bia,
+ const DepthwiseConv2dAttributes &attributes)
+{
+ return is_supported_op_helper(context, src, wei, bia, nullptr, attributes);
+}
+
Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch &sketch,
const ITensorInfo *src,
const ITensorInfo *wei,
const ITensorInfo *bia,
- const ITensorInfo *dst,
const DepthwiseConv2dAttributes &attributes)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
- ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id() || !dst->has_valid_id());
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei);
+ ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id());
if(bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());
}
+ // Refer to GpuConv2d::validate_op() for id-validness of this TensorInfo object
+ TensorInfo dst_info_to_validate;
+
// Auto initialize dst tensor info
- TensorInfo dst_info_to_validate = *dst;
calculate_and_init_dst_if_empty(&dst_info_to_validate, src, wei, attributes);
// Perform fusion test
@@ -161,20 +180,21 @@ Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch &sketch,
"Operator fusion test failed. This operator cannot be fused into the workload");
// Check if configuration is supported
- return is_supported_op(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes);
+ return is_supported_op_helper(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes);
}
-void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- ITensorInfo *wei,
- ITensorInfo *bia,
- ITensorInfo *dst,
- const DepthwiseConv2dAttributes &attributes)
+ITensorInfo *GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *src,
+ ITensorInfo *wei,
+ ITensorInfo *bia,
+ const DepthwiseConv2dAttributes &attributes)
{
- // Assert validation
- ARM_COMPUTE_ERROR_THROW_ON(GpuDepthwiseConv2d::validate_op(sketch, src, wei, bia, dst, attributes));
- ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei, dst);
- ARM_COMPUTE_LOG_PARAMS(src, wei, bia, dst, attributes);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, wei);
+ ARM_COMPUTE_LOG_PARAMS(src, wei, bia, attributes);
+ ARM_COMPUTE_ERROR_THROW_ON(GpuDepthwiseConv2d::validate_op(sketch, src, wei, bia, attributes));
+
+ ITensorInfo *dst = sketch.implementation().create_virtual_tensor();
+ ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
calculate_and_init_dst_if_empty(dst, src, wei, attributes);
@@ -197,7 +217,7 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch,
attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
// Get the depthwise convolution compute parameters
- auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+ auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
settings.is_fma_available(get_arch_from_target(gpu_target) != GPUTarget::MIDGARD)
@@ -241,6 +261,8 @@ void GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sketch,
const Operator op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
sketch.implementation().operator_group().add_operator(op);
+
+ return dst;
}
} // namespace dynamic_fusion