diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/operators')
15 files changed, 268 insertions, 286 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp index e7ee1c10df..2cec67dc65 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" + #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" @@ -32,12 +33,11 @@ namespace experimental { namespace dynamic_fusion { -Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *lhs, - const ITensorInfo *rhs) +Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, + DataType::S16, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); // Set the elementwise operation to Add then call the elementwise common validate_op @@ -46,12 +46,11 @@ Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); } -Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *lhs, - const ITensorInfo *rhs) +Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, + DataType::S16, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); // Set the elementwise operation to Add then call the elementwise common is_supported_op @@ -60,9 +59,7 @@ Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); } -ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs) +ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs) { // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op() // Set the elementwise operation to Add then call the elementwise common create_op diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp index 33c2d43e07..6f35e66ea8 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp @@ -23,12 +23,11 @@ */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h" +#include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h" - -#include "src/common/utils/Log.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" namespace arm_compute { @@ -49,7 +48,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } @@ -58,25 +57,22 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, // Check support level // Data Type - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, - 1, - DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S16, - DataType::U16, DataType::U32, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr, - 1, - DataType::U8, DataType::S8, DataType::QASYMM8, DataType::S16, - DataType::U16, DataType::U32, DataType::S32, DataType::F16, - DataType::F32); - - if(context.gpu_language() == GpuLanguage::OpenCL) + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( + src, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, + DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr, 1, DataType::U8, DataType::S8, + DataType::QASYMM8, DataType::S16, DataType::U16, DataType::U32, + DataType::S32, DataType::F16, DataType::F32); + + if (context.gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = context.cl_compile_context(); ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); // Validate Cast Component { - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); - auto settings = ClComponentCast::Settings(); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); + auto settings = ClComponentCast::Settings(); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); @@ -94,16 +90,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; } // namespace -Status GpuCast::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const CastAttributes &attributes) +Status +GpuCast::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const CastAttributes &attributes) { return is_supported_op_helper(context, src, nullptr, attributes); } -Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *src, - const CastAttributes &attributes) +Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const CastAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); @@ -127,9 +120,7 @@ Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - const CastAttributes &attributes) +ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const CastAttributes &attributes) { ARM_COMPUTE_ERROR_ON_NULLPTR(src); ARM_COMPUTE_LOG_PARAMS(src, attributes); @@ -145,14 +136,15 @@ ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch, GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph(); const auto *sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { ARM_COMPUTE_ERROR_ON(sketch_ctx->cl_compile_context() == nullptr); // Add Depthwise Conv2d Component { - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); - auto settings = ClComponentCast::Settings(); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); + auto settings = ClComponentCast::Settings(); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp index 89b533c9b8..697b7d4e1f 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp @@ -25,14 +25,13 @@ #include "arm_compute/core/experimental/Types.h" +#include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" -#include "src/common/utils/Log.h" - namespace arm_compute { namespace experimental @@ -48,12 +47,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), + "Maximum clamp value cannot be lower than minimum value"); TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } @@ -61,16 +61,15 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, auto_init_if_empty(dst_info_to_validate, *src->clone()); // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped - const ClComponentActivation::Attributes act_info - { - ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() - }; + const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + attributes.max_val(), attributes.min_val()}; // Check components - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { // Validate Activation Component - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC, src); @@ -87,16 +86,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; } // namespace -Status GpuClamp::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ClampAttributes &attributes) +Status +GpuClamp::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const ClampAttributes &attributes) { return is_supported_op_helper(context, src, nullptr, attributes); } -Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *src, - const ClampAttributes &attributes) +Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ClampAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); @@ -121,9 +117,7 @@ Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - const ClampAttributes &attributes) +ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const ClampAttributes &attributes) { ARM_COMPUTE_ERROR_ON_NULLPTR(src); ARM_COMPUTE_LOG_PARAMS(src, attributes); @@ -139,18 +133,16 @@ ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch, GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph(); // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped - const ClComponentActivation::Attributes act_info - { - ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() - }; + const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + attributes.max_val(), attributes.min_val()}; const auto *const sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { // Add Activation Component auto properties = IGpuKernelComponent::Properties(); - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC, src); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp index cb270ed4b0..aaeec543f8 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp @@ -24,15 +24,15 @@ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h" #include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h" #include "src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h" #include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h" @@ -45,24 +45,30 @@ namespace dynamic_fusion { namespace { -DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info) +DirectConvComputeKernelInfo +config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info) { // Get GPU target GPUTarget gpu_target = CLScheduler::get().target(); - std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t = arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target); + std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t = + arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target); return t->configure(src, weights, conv_info); } -void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ITensorInfo *wei, const Conv2dAttributes &attributes) +void calculate_and_init_dst_if_empty(ITensorInfo *dst, + const ITensorInfo *src, + const ITensorInfo *wei, + const Conv2dAttributes &attributes) { - if(dst->total_size() == 0U) + if (dst->total_size() == 0U) { - const auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), src->data_layout(), wei->tensor_shape(), - PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, - attributes.pad().right, - attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType + const auto shape = misc::shape_calculator::compute_deep_convolution_shape( + src->tensor_shape(), src->data_layout(), wei->tensor_shape(), + PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, + attributes.pad().right, attributes.pad().top, attributes.pad().bottom, + DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape)); } @@ -83,7 +89,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } @@ -98,18 +104,20 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, // Check components const auto gpu_target = context.gpu_target(); - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = context.cl_compile_context(); ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); // Validate Direct Conv2d Component { - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); - auto settings = ClComponentDirectConv2d::Settings(); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); + auto settings = ClComponentDirectConv2d::Settings(); settings.fast_relaxed_math( - (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) - && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16)); + (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) && + (dst_info_to_validate_ptr->data_type() == DataType::F32 || + dst_info_to_validate_ptr->data_type() == DataType::F16)); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); @@ -142,14 +150,14 @@ Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *wei, const ITensorInfo *bia, - const Conv2dAttributes &attributes) + const Conv2dAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei); ARM_COMPUTE_RETURN_ERROR_ON_MSG(!wei->are_values_constant(), "Dynamic weights are not supported"); // Check if tensors have valid id. I.e. they are created from a sketch ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id()); - if(bia != nullptr) + if (bia != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id()); } @@ -178,16 +186,13 @@ Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch, return is_supported_op_helper(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes); } -ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *wei, - ITensorInfo *bia, - const Conv2dAttributes &attributes) +ITensorInfo *GpuConv2d::create_op( + GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *wei, ITensorInfo *bia, const Conv2dAttributes &attributes) { ARM_COMPUTE_LOG_PARAMS(src, wei, bia, attributes); PadStrideInfo conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, - attributes.pad().right, - attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR); + attributes.pad().right, attributes.pad().top, attributes.pad().bottom, + DimensionRoundingType::FLOOR); // Initialize the direct convolution descriptor const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info); @@ -207,7 +212,7 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch, const auto gpu_target = sketch_ctx->gpu_target(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = sketch_ctx->cl_compile_context(); ARM_COMPUTE_ERROR_ON(cl_compile_ctx == nullptr); @@ -216,17 +221,17 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch, // Add Direct Conv2d Component { auto properties = IGpuKernelComponent::Properties(); - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); auto settings = ClComponentDirectConv2d::Settings(); settings.fast_relaxed_math( - (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) - && (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16)); + (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) && + (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16)); settings.direct_conv_descriptor(desc); - if(settings.export_to_cl_image()) + if (settings.export_to_cl_image()) { arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei); } diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp index c72098e943..e2b673bd43 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp @@ -28,8 +28,8 @@ #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h" #include "src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h" #include "src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h" @@ -42,20 +42,20 @@ namespace dynamic_fusion { namespace { -void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ITensorInfo *wei, const DepthwiseConv2dAttributes &attributes) +void calculate_and_init_dst_if_empty(ITensorInfo *dst, + const ITensorInfo *src, + const ITensorInfo *wei, + const DepthwiseConv2dAttributes &attributes) { - if(dst->total_size() == 0U) + if (dst->total_size() == 0U) { - const PadStrideInfo pad_stride_info(attributes.stride().x(), - attributes.stride().y(), - attributes.pad().left, - attributes.pad().right, - attributes.pad().top, - attributes.pad().bottom, + const PadStrideInfo pad_stride_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, + attributes.pad().right, attributes.pad().top, attributes.pad().bottom, attributes.dimension_rounding_type()); - const ConvolutionInfo conv_info{ pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(), attributes.dilation() }; - const TensorShape shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info); + const ConvolutionInfo conv_info{pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(), + attributes.dilation()}; + const TensorShape shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info); auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape)); } @@ -76,7 +76,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } @@ -91,40 +91,44 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, const GpuTarget gpu_target = context.gpu_target(); - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { const CLCompileContext *cl_compile_ctx = context.cl_compile_context(); ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); // Validate Depthwise Conv2d Component { - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); - auto settings = ClComponentDepthwiseConv2d::Settings(); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); + auto settings = ClComponentDepthwiseConv2d::Settings(); - const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, - attributes.pad().right, - attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR); + const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), + attributes.pad().left, attributes.pad().right, attributes.pad().top, + attributes.pad().bottom, DimensionRoundingType::FLOOR); // Get the depthwise convolution compute parameters - auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); - const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier()); + auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); + const DWCComputeKernelInfo dwc_info = + t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier()); settings.fast_relaxed_math( - (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) - && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16)); + (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) && + (dst_info_to_validate_ptr->data_type() == DataType::F32 || + dst_info_to_validate_ptr->data_type() == DataType::F16)); settings.is_fma_available(get_arch_from_target(gpu_target) == GPUTarget::MIDGARD) - .m0(dwc_info.m0) - .n0(dwc_info.n0) - .export_input_to_cl_image(dwc_info.export_input_to_cl_image) - .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image); + .m0(dwc_info.m0) + .n0(dwc_info.n0) + .export_input_to_cl_image(dwc_info.export_input_to_cl_image) + .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); arguments.add_const_tensor(ACL_SRC_1, wei); arguments.add_const_tensor(ACL_SRC_2, bia); arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr); - ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings)); + ARM_COMPUTE_RETURN_ON_ERROR( + ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings)); } } else @@ -158,7 +162,7 @@ Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch &sketch, ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id()); - if(bia != nullptr) + if (bia != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id()); } @@ -205,35 +209,37 @@ ITensorInfo *GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sket const auto *sketch_ctx = sketch.implementation().context(); const GpuTarget gpu_target = sketch_ctx->gpu_target(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context()); // Add Depthwise Conv2d Component { - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); - auto settings = ClComponentDepthwiseConv2d::Settings(); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); + auto settings = ClComponentDepthwiseConv2d::Settings(); - const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, - attributes.pad().right, - attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR); + const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), + attributes.pad().left, attributes.pad().right, attributes.pad().top, + attributes.pad().bottom, DimensionRoundingType::FLOOR); // Get the depthwise convolution compute parameters - auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); - const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier()); + auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target); + const DWCComputeKernelInfo dwc_info = + t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier()); settings.is_fma_available(get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) - .m0(dwc_info.m0) - .n0(dwc_info.n0) - .export_input_to_cl_image(dwc_info.export_input_to_cl_image) - .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image); + .m0(dwc_info.m0) + .n0(dwc_info.n0) + .export_input_to_cl_image(dwc_info.export_input_to_cl_image) + .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image); - if(settings.export_input_to_cl_image()) + if (settings.export_input_to_cl_image()) { arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(src); } - if(settings.export_weights_to_cl_image()) + if (settings.export_weights_to_cl_image()) { arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei); } diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp index 464a32cbad..b871171e8d 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h" + #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" @@ -32,9 +33,7 @@ namespace experimental { namespace dynamic_fusion { -Status GpuMul::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *lhs, - const ITensorInfo *rhs) +Status GpuMul::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); @@ -46,9 +45,7 @@ Status GpuMul::validate_op(const GpuWorkloadSketch &sketch, return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); } -Status GpuMul::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *lhs, - const ITensorInfo *rhs) +Status GpuMul::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); @@ -60,9 +57,7 @@ Status GpuMul::is_supported_op(const GpuWorkloadContext &context, return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); } -ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs) +ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs) { // Set the elementwise operation to Mul then call the elementwise common create_op ElementwiseBinaryCommonAttributes common_attributes{}; diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp index 107a5e5fa7..f0d368d757 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp @@ -26,10 +26,9 @@ #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" - #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/utils/Utils.h" namespace arm_compute @@ -43,9 +42,7 @@ namespace constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; } // namespace -Status GpuOutput::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst) +Status GpuOutput::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); @@ -60,9 +57,7 @@ Status GpuOutput::is_supported_op(const GpuWorkloadContext &context, return Status{}; } -Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *src, - const ITensorInfo *dst) +Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); @@ -90,9 +85,7 @@ Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch, return status; } -void GpuOutput::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst) +void GpuOutput::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *dst) { ARM_COMPUTE_LOG_PARAMS(src, dst); ARM_COMPUTE_ERROR_THROW_ON(GpuOutput::validate_op(sketch, src, dst)); @@ -104,14 +97,14 @@ void GpuOutput::create_op(GpuWorkloadSketch &sketch, auto &comp_graph = sketch.implementation().component_graph(); const auto sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { ARM_COMPUTE_ERROR_ON(sketch_ctx->cl_compile_context() == nullptr); // Add store component { IGpuKernelComponent::Properties properties; - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp index 7ecfa0158b..55c604aacc 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp @@ -22,20 +22,21 @@ * SOFTWARE. */ +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" + #include "arm_compute/core/CL/CLCompileContext.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h" - #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" + #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" +#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h" #include "src/dynamic_fusion/utils/Utils.h" namespace arm_compute @@ -46,11 +47,15 @@ namespace dynamic_fusion { namespace { -void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const Pool2dAttributes &attributes, const GpuPool2dSettings &settings) +void calculate_and_init_dst_if_empty(ITensorInfo *dst, + const ITensorInfo *src, + const Pool2dAttributes &attributes, + const GpuPool2dSettings &settings) { - if(dst->total_size() == 0U) + if (dst->total_size() == 0U) { - auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())); + auto shape = misc::shape_calculator::compute_pool_shape( + *src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())); auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape)); } } @@ -82,7 +87,7 @@ bool GpuPool2dSettings::use_inf_as_limit() const Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, - const Pool2dAttributes &attributes, + const Pool2dAttributes &attributes, const GpuPool2dSettings &settings) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); @@ -110,7 +115,7 @@ Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch, Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const Pool2dAttributes &attributes, - const GpuPool2dSettings &settings) + const GpuPool2dSettings &settings) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); // Data type @@ -118,7 +123,8 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context, // Data layout ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); // Check exclude padding is not false - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!attributes.exclude_padding(), "Exclude padding must be set to true in Attributes!"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!attributes.exclude_padding(), + "Exclude padding must be set to true in Attributes!"); // Auto initialize dst tensor info TensorInfo dst_info_to_validate; @@ -126,14 +132,15 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context, calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes, settings); // Check components - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = context.cl_compile_context(); ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); // Validate Component { - const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + const KernelProperties properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); @@ -148,10 +155,10 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context, return Status{}; } -ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - const Pool2dAttributes &attributes, - const GpuPool2dSettings &settings) +ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *src, + const Pool2dAttributes &attributes, + const GpuPool2dSettings &settings) { // Assert validation ARM_COMPUTE_ERROR_THROW_ON(GpuPool2d::validate_op(sketch, src, attributes, settings)); @@ -168,7 +175,7 @@ ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch &sketch, const auto sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = sketch_ctx->cl_compile_context(); ARM_COMPUTE_UNUSED(cl_compile_ctx); @@ -177,7 +184,7 @@ ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch &sketch, // Add Component { auto properties = IGpuKernelComponent::Properties(); - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp index 0f43a578df..3def7a1a81 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp @@ -22,12 +22,14 @@ * SOFTWARE. */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h" + #include "arm_compute/core/Error.h" + #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" namespace arm_compute { @@ -40,14 +42,14 @@ namespace Status is_supported_op_helper(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst, - const ReshapeAttributes &attributes) + const ReshapeAttributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } @@ -55,7 +57,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape())); // Check components - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = context.cl_compile_context(); ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); @@ -78,16 +80,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, GpuOperatorType operator_type = GpuOperatorType::Complex; } // namespace -Status GpuReshape::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const Attributes &attributes) +Status +GpuReshape::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const Attributes &attributes) { return is_supported_op_helper(context, src, nullptr, attributes); } -Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *src, - const Attributes &attributes) +Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const Attributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); @@ -111,9 +110,7 @@ Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - const Attributes &attributes) +ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const Attributes &attributes) { ARM_COMPUTE_ERROR_ON_NULLPTR(src); ARM_COMPUTE_LOG_PARAMS(src, attributes.shape()); @@ -127,7 +124,7 @@ ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch, // Translate into components and add to component graph auto &comp_graph = sketch.implementation().component_graph(); const auto sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = sketch_ctx->cl_compile_context(); ARM_COMPUTE_UNUSED(cl_compile_ctx); @@ -136,7 +133,7 @@ ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch, // Add ElementwiseBinary Component { auto properties = IGpuKernelComponent::Properties(); - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp index 5f52eea7d0..fb09875b33 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp @@ -26,12 +26,12 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" + +#include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h" - -#include "src/common/utils/Log.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" namespace arm_compute { @@ -43,7 +43,7 @@ namespace { void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ResizeAttributes &attributes) { - if(dst->total_size() == 0U) + if (dst->total_size() == 0U) { TensorShape out_shape = src->tensor_shape(); @@ -64,7 +64,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } @@ -73,22 +73,25 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, // Check support level // Data type - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::U8, DataType::S16, DataType::F16, DataType::F32); // Data layout ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC); // Interpolation policy - ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.interpolation_policy() != InterpolationPolicy::NEAREST_NEIGHBOR && attributes.interpolation_policy() != InterpolationPolicy::BILINEAR, + ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.interpolation_policy() != InterpolationPolicy::NEAREST_NEIGHBOR && + attributes.interpolation_policy() != InterpolationPolicy::BILINEAR, "Interpolation policy must be NEAREST_NEIGHBOR or BILINEAR"); // Check components - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = context.cl_compile_context(); ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); // Validate Activation Component { - const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + const KernelProperties properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); @@ -107,16 +110,14 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, constexpr GpuOperatorType operator_type = GpuOperatorType::Complex; } // namespace -Status GpuResize::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src, - const Attributes &attributes) +Status +GpuResize::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const Attributes &attributes) { return is_supported_op_helper(context, src, nullptr, attributes); } -Status GpuResize::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *src, - const GpuResize::Attributes &attributes) +Status +GpuResize::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const GpuResize::Attributes &attributes) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id()); @@ -141,9 +142,7 @@ Status GpuResize::validate_op(const GpuWorkloadSketch &sketch, return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - const GpuResize::Attributes &attributes) +ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const GpuResize::Attributes &attributes) { ARM_COMPUTE_ERROR_ON_NULLPTR(src); ARM_COMPUTE_LOG_PARAMS(src, attributes); @@ -159,13 +158,14 @@ ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch, GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph(); const auto *sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context()); // Add Resize Component { - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, src); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp index 09debad969..a2260c8c36 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp @@ -23,14 +23,15 @@ */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h" + #include "arm_compute/core/experimental/Types.h" #include "arm_compute/function_info/ActivationLayerInfo.h" #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" namespace arm_compute { @@ -40,9 +41,7 @@ namespace dynamic_fusion { namespace { -Status is_supported_op_helper(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst) +Status is_supported_op_helper(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); @@ -50,20 +49,21 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } auto_init_if_empty(dst_info_to_validate, *src->clone()); - const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LOGISTIC }; + const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC}; // Check components - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { // Validate Activation Component - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC, src); @@ -80,14 +80,12 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; } // namespace -Status GpuSigmoid::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src) +Status GpuSigmoid::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src) { return is_supported_op_helper(context, src, nullptr); } -Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *src) +Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); @@ -112,8 +110,7 @@ Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch, return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate); } -ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src) +ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src) { ARM_COMPUTE_ERROR_ON_NULLPTR(src); ARM_COMPUTE_LOG_PARAMS(src); @@ -128,15 +125,15 @@ ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch, // Translate into components and add to component graph GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph(); - const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LOGISTIC }; + const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC}; const auto *const sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { // Add Activation Component auto properties = IGpuKernelComponent::Properties(); - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC, src); diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp index ffc4553a7d..c87b282aec 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp @@ -22,13 +22,14 @@ * SOFTWARE. */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h" + #include "arm_compute/core/Error.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" +#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" +#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" #include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" @@ -52,7 +53,7 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context, TensorInfo dst_info_to_validate; // Auto initialize dst tensor info - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate = *dst; } @@ -61,11 +62,12 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context, auto_init_if_empty(dst_info_to_validate, *src->clone()); } // Check components - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = context.cl_compile_context(); ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); - const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + const KernelProperties properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); TensorShape logits_sum_shape = src->tensor_shape(); TensorInfo logits(src->clone()->set_tensor_shape(logits_sum_shape)); @@ -86,7 +88,8 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context, arguments_norm.add_const_tensor(ACL_SRC_1, &sum); arguments_norm.add_const_tensor(ACL_DST_0, &dst_info_to_validate); - ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes)); + ARM_COMPUTE_RETURN_ON_ERROR( + ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes)); ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DNorm::validate(properties, arguments_norm, attributes)); } else @@ -105,14 +108,16 @@ Status GpuSoftmax::validate_op(const GpuWorkloadSketch &sketch, ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->num_dimensions() > 4, "Only up to 4 dimensions are supported"); - ARM_COMPUTE_RETURN_ERROR_ON(attributes.axis() < static_cast<int32_t>(-src->num_dimensions()) || static_cast<int32_t>(src->num_dimensions()) <= attributes.axis()); + ARM_COMPUTE_RETURN_ERROR_ON(attributes.axis() < static_cast<int32_t>(-src->num_dimensions()) || + static_cast<int32_t>(src->num_dimensions()) <= attributes.axis()); // Auto initialize dst tensor info TensorInfo dst_info_to_validate = *dst; auto_init_if_empty(dst_info_to_validate, *src->clone()); - const size_t actual_axis = static_cast<size_t>(wrap_around(attributes.axis(), static_cast<int32_t>(src->num_dimensions()))); - const bool needs_permute = actual_axis != 0; + const size_t actual_axis = + static_cast<size_t>(wrap_around(attributes.axis(), static_cast<int32_t>(src->num_dimensions()))); + const bool needs_permute = actual_axis != 0; ARM_COMPUTE_RETURN_ERROR_ON_MSG(needs_permute, "Dynamic fusion softmax on axis!=0 not supported yet."); // Perform fusion test and check if the operator meets the fusion constraints @@ -128,17 +133,16 @@ Status GpuSoftmax::validate_op(const GpuWorkloadSketch &sketch, return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes); } -void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src, - ITensorInfo *dst, - const Attributes &attributes) +void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *dst, const Attributes &attributes) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_LOG_PARAMS(src, dst, attributes); TensorShape logits_sum_shape = src->tensor_shape(); - ITensorInfo *logits = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape)); + ITensorInfo *logits = sketch.implementation().create_auxiliary_tensor( + src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape)); logits_sum_shape.set(0, 1); - ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape)); + ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor( + src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape)); // Auto initialize dst tensor info and the auxiliary tensor infos as well auto_init_if_empty(*dst, *src->clone()); @@ -151,7 +155,7 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, auto &comp_graph = sketch.implementation().component_graph(); const auto sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = sketch_ctx->cl_compile_context(); ARM_COMPUTE_UNUSED(cl_compile_ctx); @@ -160,7 +164,7 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, // Add Direct Conv2d Component { auto properties = IGpuKernelComponent::Properties(); - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments_exp_sum; ArgumentPack<ITensorInfo> arguments_norm; diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp index 8240008f2a..e5d62c9930 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h" + #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" @@ -32,12 +33,11 @@ namespace experimental { namespace dynamic_fusion { -Status GpuSub::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *lhs, - const ITensorInfo *rhs) +Status GpuSub::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, + DataType::S16, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); // Set the elementwise operation to Sub then call the elementwise common validate_op @@ -46,12 +46,11 @@ Status GpuSub::validate_op(const GpuWorkloadSketch &sketch, return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); } -Status GpuSub::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *lhs, - const ITensorInfo *rhs) +Status GpuSub::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, + DataType::S16, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); // Set the elementwise operation to Sub then call the elementwise common is_supported_op @@ -60,9 +59,7 @@ Status GpuSub::is_supported_op(const GpuWorkloadContext &context, return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); } -ITensorInfo *GpuSub::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *lhs, - ITensorInfo *rhs) +ITensorInfo *GpuSub::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs) { // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op() // Set the elementwise operation to Sub then call the elementwise common create_op diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp index c00716c76e..bf0f274c5c 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp @@ -23,14 +23,15 @@ */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuTanh.h" + #include "arm_compute/core/experimental/Types.h" +#include "src/common/utils/Log.h" +#include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/common/utils/Log.h" namespace arm_compute { @@ -40,9 +41,7 @@ namespace dynamic_fusion { namespace { -Status is_supported_op_helper(const GpuWorkloadContext &context, - const ITensorInfo *src, - const ITensorInfo *dst) +Status is_supported_op_helper(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); @@ -50,20 +49,21 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } auto_init_if_empty(dst_info_to_validate, *src->clone()); - const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::TANH }; + const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::TANH}; // Check components - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { // Validate Activation Component - const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + const auto properties = + IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC, src); @@ -80,14 +80,12 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, constexpr GpuOperatorType operator_type = GpuOperatorType::Simple; } // namespace -Status GpuTanh::is_supported_op(const GpuWorkloadContext &context, - const ITensorInfo *src) +Status GpuTanh::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src) { return is_supported_op_helper(context, src, nullptr); } -Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch, - const ITensorInfo *src) +Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); @@ -112,8 +110,7 @@ Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch, return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate); } -ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch &sketch, - ITensorInfo *src) +ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src) { ARM_COMPUTE_ERROR_ON_NULLPTR(src); ARM_COMPUTE_LOG_PARAMS(src); @@ -128,15 +125,15 @@ ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch &sketch, // Translate into components and add to component graph GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph(); - const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::TANH }; + const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::TANH}; const auto *const sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { // Add Activation Component auto properties = IGpuKernelComponent::Properties(); - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC, src); diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp index 7c087c9a7b..d79a4c42c9 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp @@ -22,11 +22,12 @@ * SOFTWARE. */ #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" + #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" namespace arm_compute { @@ -38,9 +39,10 @@ namespace { void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *lhs, const ITensorInfo *rhs) { - if(dst->total_size() == 0U) + if (dst->total_size() == 0U) { - const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs, *rhs); + const std::pair<TensorShape, ValidRegion> broadcast_pair = + ITensorInfo::broadcast_shape_and_valid_region(*lhs, *rhs); auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(broadcast_pair.first)); } } @@ -56,7 +58,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, TensorInfo dst_info_to_validate; const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate; - if(dst != nullptr) + if (dst != nullptr) { dst_info_to_validate_ptr = dst; } @@ -64,7 +66,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs); // Check components - if(context.gpu_language() == GpuLanguage::OpenCL) + if (context.gpu_language() == GpuLanguage::OpenCL) { const auto cl_compile_ctx = context.cl_compile_context(); ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr); @@ -90,7 +92,8 @@ Status is_supported_op_helper(const GpuWorkloadContext &context, GpuOperatorType operator_type = GpuOperatorType::Simple; } // namespace -ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation) +ElementwiseBinaryCommonAttributes & +ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation) { _operation = operation; return *this; @@ -157,14 +160,14 @@ ITensorInfo *GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch const auto sketch_ctx = sketch.implementation().context(); - if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL) + if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL) { ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context()); // Add ElementwiseBinary Component { auto properties = IGpuKernelComponent::Properties(); - properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run }); + properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run}); ArgumentPack<ITensorInfo> arguments; arguments.add_const_tensor(ACL_SRC_0, lhs); |